gcc/tree-data-ref.cc

   1 /* Data references and dependences detectors.
   2    Copyright (C) 2003-2024 Free Software Foundation, Inc.
   3    Contributed by Sebastian Pop <pop@cri.ensmp.fr>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass walks a given loop structure searching for array
  22    references.  The information about the array accesses is recorded
  23    in DATA_REFERENCE structures.
  24
  25    The basic test for determining the dependences is:
  26    given two access functions chrec1 and chrec2 to a same array, and
  27    x and y two vectors from the iteration domain, the same element of
  28    the array is accessed twice at iterations x and y if and only if:
  29    |             chrec1 (x) == chrec2 (y).
  30
  31    The goals of this analysis are:
  32
  33    - to determine the independence: the relation between two
  34      independent accesses is qualified with the chrec_known (this
  35      information allows a loop parallelization),
  36
  37    - when two data references access the same data, to qualify the
  38      dependence relation with classic dependence representations:
  39
  40        - distance vectors
  41        - direction vectors
  42        - loop carried level dependence
  43        - polyhedron dependence
  44      or with the chains of recurrences based representation,
  45
  46    - to define a knowledge base for storing the data dependence
  47      information,
  48
  49    - to define an interface to access this data.
  50
  51
  52    Definitions:
  53
  54    - subscript: given two array accesses a subscript is the tuple
  55    composed of the access functions for a given dimension.  Example:
  56    Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
  57    (f1, g1), (f2, g2), (f3, g3).
  58
  59    - Diophantine equation: an equation whose coefficients and
  60    solutions are integer constants, for example the equation
  61    |   3*x + 2*y = 1
  62    has an integer solution x = 1 and y = -1.
  63
  64    References:
  65
  66    - "Advanced Compilation for High Performance Computing" by Randy
  67    Allen and Ken Kennedy.
  68    http://citeseer.ist.psu.edu/goff91practical.html
  69
  70    - "Loop Transformations for Restructuring Compilers - The Foundations"
  71    by Utpal Banerjee.
  72
  73
  74 */
  75
  76 #define INCLUDE_ALGORITHM
  77 #define INCLUDE_MEMORY
  78 #include "config.h"
  79 #include "system.h"
  80 #include "coretypes.h"
  81 #include "backend.h"
  82 #include "rtl.h"
  83 #include "tree.h"
  84 #include "gimple.h"
  85 #include "gimple-pretty-print.h"
  86 #include "alias.h"
  87 #include "fold-const.h"
  88 #include "expr.h"
  89 #include "gimple-iterator.h"
  90 #include "tree-ssa-loop-niter.h"
  91 #include "tree-ssa-loop.h"
  92 #include "tree-ssa.h"
  93 #include "cfgloop.h"
  94 #include "tree-data-ref.h"
  95 #include "tree-scalar-evolution.h"
  96 #include "dumpfile.h"
  97 #include "tree-affine.h"
  98 #include "builtins.h"
  99 #include "tree-eh.h"
 100 #include "ssa.h"
 101 #include "internal-fn.h"
 102 #include "vr-values.h"
 103 #include "range-op.h"
 104 #include "tree-ssa-loop-ivopts.h"
 105 #include "calls.h"
 106
 107 static struct datadep_stats
 108 {
 109   int num_dependence_tests;
 110   int num_dependence_dependent;
 111   int num_dependence_independent;
 112   int num_dependence_undetermined;
 113
 114   int num_subscript_tests;
 115   int num_subscript_undetermined;
 116   int num_same_subscript_function;
 117
 118   int num_ziv;
 119   int num_ziv_independent;
 120   int num_ziv_dependent;
 121   int num_ziv_unimplemented;
 122
 123   int num_siv;
 124   int num_siv_independent;
 125   int num_siv_dependent;
 126   int num_siv_unimplemented;
 127
 128   int num_miv;
 129   int num_miv_independent;
 130   int num_miv_dependent;
 131   int num_miv_unimplemented;
 132 } dependence_stats;
 133
 134 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
 135                                            unsigned int, unsigned int,
 136                                            class loop *);
 137 /* Returns true iff A divides B.  */
 138
 139 static inline bool
 140 tree_fold_divides_p (const_tree a, const_tree b)
 141 {
 142   gcc_assert (TREE_CODE (a) == INTEGER_CST);
 143   gcc_assert (TREE_CODE (b) == INTEGER_CST);
 144   return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a));
 145 }
 146
 147 /* Returns true iff A divides B.  */
 148
 149 static inline bool
 150 int_divides_p (lambda_int a, lambda_int b)
 151 {
 152   return ((b % a) == 0);
 153 }
 154
 155 /* Return true if reference REF contains a union access.  */
 156
 157 static bool
 158 ref_contains_union_access_p (tree ref)
 159 {
 160   while (handled_component_p (ref))
 161     {
 162       ref = TREE_OPERAND (ref, 0);
 163       if (TREE_CODE (TREE_TYPE (ref)) == UNION_TYPE
 164           || TREE_CODE (TREE_TYPE (ref)) == QUAL_UNION_TYPE)
 165         return true;
 166     }
 167   return false;
 168 }
 169
 170 \f
 171
 172 /* Dump into FILE all the data references from DATAREFS.  */
 173
 174 static void
 175 dump_data_references (FILE *file, vec<data_reference_p> datarefs)
 176 {
 177   for (data_reference *dr : datarefs)
 178     dump_data_reference (file, dr);
 179 }
 180
 181 /* Unified dump into FILE all the data references from DATAREFS.  */
 182
 183 DEBUG_FUNCTION void
 184 debug (vec<data_reference_p> &ref)
 185 {
 186   dump_data_references (stderr, ref);
 187 }
 188
 189 DEBUG_FUNCTION void
 190 debug (vec<data_reference_p> *ptr)
 191 {
 192   if (ptr)
 193     debug (*ptr);
 194   else
 195     fprintf (stderr, "<nil>\n");
 196 }
 197
 198
 199 /* Dump into STDERR all the data references from DATAREFS.  */
 200
 201 DEBUG_FUNCTION void
 202 debug_data_references (vec<data_reference_p> datarefs)
 203 {
 204   dump_data_references (stderr, datarefs);
 205 }
 206
 207 /* Print to STDERR the data_reference DR.  */
 208
 209 DEBUG_FUNCTION void
 210 debug_data_reference (struct data_reference *dr)
 211 {
 212   dump_data_reference (stderr, dr);
 213 }
 214
 215 /* Dump function for a DATA_REFERENCE structure.  */
 216
 217 void
 218 dump_data_reference (FILE *outf,
 219                      struct data_reference *dr)
 220 {
 221   unsigned int i;
 222
 223   fprintf (outf, "#(Data Ref: \n");
 224   fprintf (outf, "#  bb: %d \n", gimple_bb (DR_STMT (dr))->index);
 225   fprintf (outf, "#  stmt: ");
 226   print_gimple_stmt (outf, DR_STMT (dr), 0);
 227   fprintf (outf, "#  ref: ");
 228   print_generic_stmt (outf, DR_REF (dr));
 229   fprintf (outf, "#  base_object: ");
 230   print_generic_stmt (outf, DR_BASE_OBJECT (dr));
 231
 232   for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
 233     {
 234       fprintf (outf, "#  Access function %d: ", i);
 235       print_generic_stmt (outf, DR_ACCESS_FN (dr, i));
 236     }
 237   fprintf (outf, "#)\n");
 238 }
 239
 240 /* Unified dump function for a DATA_REFERENCE structure.  */
 241
 242 DEBUG_FUNCTION void
 243 debug (data_reference &ref)
 244 {
 245   dump_data_reference (stderr, &ref);
 246 }
 247
 248 DEBUG_FUNCTION void
 249 debug (data_reference *ptr)
 250 {
 251   if (ptr)
 252     debug (*ptr);
 253   else
 254     fprintf (stderr, "<nil>\n");
 255 }
 256
 257
 258 /* Dumps the affine function described by FN to the file OUTF.  */
 259
 260 DEBUG_FUNCTION void
 261 dump_affine_function (FILE *outf, affine_fn fn)
 262 {
 263   unsigned i;
 264   tree coef;
 265
 266   print_generic_expr (outf, fn[0], TDF_SLIM);
 267   for (i = 1; fn.iterate (i, &coef); i++)
 268     {
 269       fprintf (outf, " + ");
 270       print_generic_expr (outf, coef, TDF_SLIM);
 271       fprintf (outf, " * x_%u", i);
 272     }
 273 }
 274
 275 /* Dumps the conflict function CF to the file OUTF.  */
 276
 277 DEBUG_FUNCTION void
 278 dump_conflict_function (FILE *outf, conflict_function *cf)
 279 {
 280   unsigned i;
 281
 282   if (cf->n == NO_DEPENDENCE)
 283     fprintf (outf, "no dependence");
 284   else if (cf->n == NOT_KNOWN)
 285     fprintf (outf, "not known");
 286   else
 287     {
 288       for (i = 0; i < cf->n; i++)
 289         {
 290           if (i != 0)
 291             fprintf (outf, " ");
 292           fprintf (outf, "[");
 293           dump_affine_function (outf, cf->fns[i]);
 294           fprintf (outf, "]");
 295         }
 296     }
 297 }
 298
 299 /* Dump function for a SUBSCRIPT structure.  */
 300
 301 DEBUG_FUNCTION void
 302 dump_subscript (FILE *outf, struct subscript *subscript)
 303 {
 304   conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
 305
 306   fprintf (outf, "\n (subscript \n");
 307   fprintf (outf, "  iterations_that_access_an_element_twice_in_A: ");
 308   dump_conflict_function (outf, cf);
 309   if (CF_NONTRIVIAL_P (cf))
 310     {
 311       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 312       fprintf (outf, "\n  last_conflict: ");
 313       print_generic_expr (outf, last_iteration);
 314     }
 315
 316   cf = SUB_CONFLICTS_IN_B (subscript);
 317   fprintf (outf, "\n  iterations_that_access_an_element_twice_in_B: ");
 318   dump_conflict_function (outf, cf);
 319   if (CF_NONTRIVIAL_P (cf))
 320     {
 321       tree last_iteration = SUB_LAST_CONFLICT (subscript);
 322       fprintf (outf, "\n  last_conflict: ");
 323       print_generic_expr (outf, last_iteration);
 324     }
 325
 326   fprintf (outf, "\n  (Subscript distance: ");
 327   print_generic_expr (outf, SUB_DISTANCE (subscript));
 328   fprintf (outf, " ))\n");
 329 }
 330
 331 /* Print the classic direction vector DIRV to OUTF.  */
 332
 333 DEBUG_FUNCTION void
 334 print_direction_vector (FILE *outf,
 335                         lambda_vector dirv,
 336                         int length)
 337 {
 338   int eq;
 339
 340   for (eq = 0; eq < length; eq++)
 341     {
 342       enum data_dependence_direction dir = ((enum data_dependence_direction)
 343                                             dirv[eq]);
 344
 345       switch (dir)
 346         {
 347         case dir_positive:
 348           fprintf (outf, "    +");
 349           break;
 350         case dir_negative:
 351           fprintf (outf, "    -");
 352           break;
 353         case dir_equal:
 354           fprintf (outf, "    =");
 355           break;
 356         case dir_positive_or_equal:
 357           fprintf (outf, "   +=");
 358           break;
 359         case dir_positive_or_negative:
 360           fprintf (outf, "   +-");
 361           break;
 362         case dir_negative_or_equal:
 363           fprintf (outf, "   -=");
 364           break;
 365         case dir_star:
 366           fprintf (outf, "    *");
 367           break;
 368         default:
 369           fprintf (outf, "indep");
 370           break;
 371         }
 372     }
 373   fprintf (outf, "\n");
 374 }
 375
 376 /* Print a vector of direction vectors.  */
 377
 378 DEBUG_FUNCTION void
 379 print_dir_vectors (FILE *outf, vec<lambda_vector> dir_vects,
 380                    int length)
 381 {
 382   for (lambda_vector v : dir_vects)
 383     print_direction_vector (outf, v, length);
 384 }
 385
 386 /* Print out a vector VEC of length N to OUTFILE.  */
 387
 388 DEBUG_FUNCTION void
 389 print_lambda_vector (FILE * outfile, lambda_vector vector, int n)
 390 {
 391   int i;
 392
 393   for (i = 0; i < n; i++)
 394     fprintf (outfile, HOST_WIDE_INT_PRINT_DEC " ", vector[i]);
 395   fprintf (outfile, "\n");
 396 }
 397
 398 /* Print a vector of distance vectors.  */
 399
 400 DEBUG_FUNCTION void
 401 print_dist_vectors (FILE *outf, vec<lambda_vector> dist_vects,
 402                     int length)
 403 {
 404   for (lambda_vector v : dist_vects)
 405     print_lambda_vector (outf, v, length);
 406 }
 407
 408 /* Dump function for a DATA_DEPENDENCE_RELATION structure.  */
 409
 410 DEBUG_FUNCTION void
 411 dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr)
 412 {
 413   struct data_reference *dra, *drb;
 414
 415   fprintf (outf, "(Data Dep: \n");
 416
 417   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
 418     {
 419       if (ddr)
 420         {
 421           dra = DDR_A (ddr);
 422           drb = DDR_B (ddr);
 423           if (dra)
 424             dump_data_reference (outf, dra);
 425           else
 426             fprintf (outf, "    (nil)\n");
 427           if (drb)
 428             dump_data_reference (outf, drb);
 429           else
 430             fprintf (outf, "    (nil)\n");
 431         }
 432       fprintf (outf, "    (don't know)\n)\n");
 433       return;
 434     }
 435
 436   dra = DDR_A (ddr);
 437   drb = DDR_B (ddr);
 438   dump_data_reference (outf, dra);
 439   dump_data_reference (outf, drb);
 440
 441   if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
 442     fprintf (outf, "    (no dependence)\n");
 443
 444   else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
 445     {
 446       unsigned int i;
 447       class loop *loopi;
 448
 449       subscript *sub;
 450       FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
 451         {
 452           fprintf (outf, "  access_fn_A: ");
 453           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 0));
 454           fprintf (outf, "  access_fn_B: ");
 455           print_generic_stmt (outf, SUB_ACCESS_FN (sub, 1));
 456           dump_subscript (outf, sub);
 457         }
 458
 459       fprintf (outf, "  loop nest: (");
 460       FOR_EACH_VEC_ELT (DDR_LOOP_NEST (ddr), i, loopi)
 461         fprintf (outf, "%d ", loopi->num);
 462       fprintf (outf, ")\n");
 463
 464       for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
 465         {
 466           fprintf (outf, "  distance_vector: ");
 467           print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
 468                                DDR_NB_LOOPS (ddr));
 469         }
 470
 471       for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
 472         {
 473           fprintf (outf, "  direction_vector: ");
 474           print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
 475                                   DDR_NB_LOOPS (ddr));
 476         }
 477     }
 478
 479   fprintf (outf, ")\n");
 480 }
 481
 482 /* Debug version.  */
 483
 484 DEBUG_FUNCTION void
 485 debug_data_dependence_relation (const struct data_dependence_relation *ddr)
 486 {
 487   dump_data_dependence_relation (stderr, ddr);
 488 }
 489
 490 /* Dump into FILE all the dependence relations from DDRS.  */
 491
 492 DEBUG_FUNCTION void
 493 dump_data_dependence_relations (FILE *file, const vec<ddr_p> &ddrs)
 494 {
 495   for (auto ddr : ddrs)
 496     dump_data_dependence_relation (file, ddr);
 497 }
 498
 499 DEBUG_FUNCTION void
 500 debug (vec<ddr_p> &ref)
 501 {
 502   dump_data_dependence_relations (stderr, ref);
 503 }
 504
 505 DEBUG_FUNCTION void
 506 debug (vec<ddr_p> *ptr)
 507 {
 508   if (ptr)
 509     debug (*ptr);
 510   else
 511     fprintf (stderr, "<nil>\n");
 512 }
 513
 514
 515 /* Dump to STDERR all the dependence relations from DDRS.  */
 516
 517 DEBUG_FUNCTION void
 518 debug_data_dependence_relations (vec<ddr_p> ddrs)
 519 {
 520   dump_data_dependence_relations (stderr, ddrs);
 521 }
 522
 523 /* Dumps the distance and direction vectors in FILE.  DDRS contains
 524    the dependence relations, and VECT_SIZE is the size of the
 525    dependence vectors, or in other words the number of loops in the
 526    considered nest.  */
 527
 528 DEBUG_FUNCTION void
 529 dump_dist_dir_vectors (FILE *file, vec<ddr_p> ddrs)
 530 {
 531   for (data_dependence_relation *ddr : ddrs)
 532     if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
 533       {
 534         for (lambda_vector v : DDR_DIST_VECTS (ddr))
 535           {
 536             fprintf (file, "DISTANCE_V (");
 537             print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
 538             fprintf (file, ")\n");
 539           }
 540
 541         for (lambda_vector v : DDR_DIR_VECTS (ddr))
 542           {
 543             fprintf (file, "DIRECTION_V (");
 544             print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
 545             fprintf (file, ")\n");
 546           }
 547       }
 548
 549   fprintf (file, "\n\n");
 550 }
 551
 552 /* Dumps the data dependence relations DDRS in FILE.  */
 553
 554 DEBUG_FUNCTION void
 555 dump_ddrs (FILE *file, vec<ddr_p> ddrs)
 556 {
 557   for (data_dependence_relation *ddr : ddrs)
 558     dump_data_dependence_relation (file, ddr);
 559
 560   fprintf (file, "\n\n");
 561 }
 562
 563 DEBUG_FUNCTION void
 564 debug_ddrs (vec<ddr_p> ddrs)
 565 {
 566   dump_ddrs (stderr, ddrs);
 567 }
 568
 569 /* If RESULT_RANGE is nonnull, set *RESULT_RANGE to the range of
 570    OP0 CODE OP1, where:
 571
 572    - OP0 CODE OP1 has integral type TYPE
 573    - the range of OP0 is given by OP0_RANGE and
 574    - the range of OP1 is given by OP1_RANGE.
 575
 576    Independently of RESULT_RANGE, try to compute:
 577
 578      DELTA = ((sizetype) OP0 CODE (sizetype) OP1)
 579              - (sizetype) (OP0 CODE OP1)
 580
 581    as a constant and subtract DELTA from the ssizetype constant in *OFF.
 582    Return true on success, or false if DELTA is not known at compile time.
 583
 584    Truncation and sign changes are known to distribute over CODE, i.e.
 585
 586      (itype) (A CODE B) == (itype) A CODE (itype) B
 587
 588    for any integral type ITYPE whose precision is no greater than the
 589    precision of A and B.  */
 590
 591 static bool
 592 compute_distributive_range (tree type, irange &op0_range,
 593                             tree_code code, irange &op1_range,
 594                             tree *off, irange *result_range)
 595 {
 596   gcc_assert (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type));
 597   if (result_range)
 598     {
 599       range_op_handler op (code);
 600       if (!op.fold_range (*result_range, type, op0_range, op1_range))
 601         result_range->set_varying (type);
 602     }
 603
 604   /* The distributive property guarantees that if TYPE is no narrower
 605      than SIZETYPE,
 606
 607        (sizetype) (OP0 CODE OP1) == (sizetype) OP0 CODE (sizetype) OP1
 608
 609      and so we can treat DELTA as zero.  */
 610   if (TYPE_PRECISION (type) >= TYPE_PRECISION (sizetype))
 611     return true;
 612
 613   /* If overflow is undefined, we can assume that:
 614
 615        X == (ssizetype) OP0 CODE (ssizetype) OP1
 616
 617      is within the range of TYPE, i.e.:
 618
 619        X == (ssizetype) (TYPE) X
 620
 621      Distributing the (TYPE) truncation over X gives:
 622
 623        X == (ssizetype) (OP0 CODE OP1)
 624
 625      Casting both sides to sizetype and distributing the sizetype cast
 626      over X gives:
 627
 628        (sizetype) OP0 CODE (sizetype) OP1 == (sizetype) (OP0 CODE OP1)
 629
 630      and so we can treat DELTA as zero.  */
 631   if (TYPE_OVERFLOW_UNDEFINED (type))
 632     return true;
 633
 634   /* Compute the range of:
 635
 636        (ssizetype) OP0 CODE (ssizetype) OP1
 637
 638      The distributive property guarantees that this has the same bitpattern as:
 639
 640        (sizetype) OP0 CODE (sizetype) OP1
 641
 642      but its range is more conducive to analysis.  */
 643   range_cast (op0_range, ssizetype);
 644   range_cast (op1_range, ssizetype);
 645   int_range_max wide_range;
 646   range_op_handler op (code);
 647   bool saved_flag_wrapv = flag_wrapv;
 648   flag_wrapv = 1;
 649   if (!op.fold_range (wide_range, ssizetype, op0_range, op1_range))
 650     wide_range.set_varying (ssizetype);;
 651   flag_wrapv = saved_flag_wrapv;
 652   if (wide_range.num_pairs () != 1
 653       || wide_range.varying_p () || wide_range.undefined_p ())
 654     return false;
 655
 656   wide_int lb = wide_range.lower_bound ();
 657   wide_int ub = wide_range.upper_bound ();
 658
 659   /* Calculate the number of times that each end of the range overflows or
 660      underflows TYPE.  We can only calculate DELTA if the numbers match.  */
 661   unsigned int precision = TYPE_PRECISION (type);
 662   if (!TYPE_UNSIGNED (type))
 663     {
 664       wide_int type_min = wi::mask (precision - 1, true, lb.get_precision ());
 665       lb -= type_min;
 666       ub -= type_min;
 667     }
 668   wide_int upper_bits = wi::mask (precision, true, lb.get_precision ());
 669   lb &= upper_bits;
 670   ub &= upper_bits;
 671   if (lb != ub)
 672     return false;
 673
 674   /* OP0 CODE OP1 overflows exactly arshift (LB, PRECISION) times, with
 675      negative values indicating underflow.  The low PRECISION bits of LB
 676      are clear, so DELTA is therefore LB (== UB).  */
 677   *off = wide_int_to_tree (ssizetype, wi::to_wide (*off) - lb);
 678   return true;
 679 }
 680
 681 /* Return true if (sizetype) OP == (sizetype) (TO_TYPE) OP,
 682    given that OP has type FROM_TYPE and range RANGE.  Both TO_TYPE and
 683    FROM_TYPE are integral types.  */
 684
 685 static bool
 686 nop_conversion_for_offset_p (tree to_type, tree from_type, irange &range)
 687 {
 688   gcc_assert (INTEGRAL_TYPE_P (to_type)
 689               && INTEGRAL_TYPE_P (from_type)
 690               && !TYPE_OVERFLOW_TRAPS (to_type)
 691               && !TYPE_OVERFLOW_TRAPS (from_type));
 692
 693   /* Converting to something no narrower than sizetype and then to sizetype
 694      is equivalent to converting directly to sizetype.  */
 695   if (TYPE_PRECISION (to_type) >= TYPE_PRECISION (sizetype))
 696     return true;
 697
 698   /* Check whether TO_TYPE can represent all values that FROM_TYPE can.  */
 699   if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type)
 700       && (TYPE_UNSIGNED (from_type) || !TYPE_UNSIGNED (to_type)))
 701     return true;
 702
 703   /* For narrowing conversions, we could in principle test whether
 704      the bits in FROM_TYPE but not in TO_TYPE have a fixed value
 705      and apply a constant adjustment.
 706
 707      For other conversions (which involve a sign change) we could
 708      check that the signs are always equal, and apply a constant
 709      adjustment if the signs are negative.
 710
 711      However, both cases should be rare.  */
 712   return range_fits_type_p (&range, TYPE_PRECISION (to_type),
 713                             TYPE_SIGN (to_type));
 714 }
 715
 716 static void
 717 split_constant_offset (tree type, tree *var, tree *off,
 718                        irange *result_range,
 719                        hash_map<tree, std::pair<tree, tree> > &cache,
 720                        unsigned *limit);
 721
 722 /* Helper function for split_constant_offset.  If TYPE is a pointer type,
 723    try to express OP0 CODE OP1 as:
 724
 725      POINTER_PLUS <*VAR, (sizetype) *OFF>
 726
 727    where:
 728
 729    - *VAR has type TYPE
 730    - *OFF is a constant of type ssizetype.
 731
 732    If TYPE is an integral type, try to express (sizetype) (OP0 CODE OP1) as:
 733
 734      *VAR + (sizetype) *OFF
 735
 736    where:
 737
 738    - *VAR has type sizetype
 739    - *OFF is a constant of type ssizetype.
 740
 741    In both cases, OP0 CODE OP1 has type TYPE.
 742
 743    Return true on success.  A false return value indicates that we can't
 744    do better than set *OFF to zero.
 745
 746    When returning true, set RESULT_RANGE to the range of OP0 CODE OP1,
 747    if RESULT_RANGE is nonnull and if we can do better than assume VR_VARYING.
 748
 749    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
 750    visited.  LIMIT counts down the number of SSA names that we are
 751    allowed to process before giving up.  */
 752
 753 static bool
 754 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 755                          tree *var, tree *off, irange *result_range,
 756                          hash_map<tree, std::pair<tree, tree> > &cache,
 757                          unsigned *limit)
 758 {
 759   tree var0, var1;
 760   tree off0, off1;
 761   int_range_max op0_range, op1_range;
 762
 763   *var = NULL_TREE;
 764   *off = NULL_TREE;
 765
 766   if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
 767     return false;
 768
 769   if (TREE_CODE (op0) == SSA_NAME
 770       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op0))
 771     return false;
 772   if (op1
 773       && TREE_CODE (op1) == SSA_NAME
 774       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op1))
 775     return false;
 776
 777   switch (code)
 778     {
 779     case INTEGER_CST:
 780       *var = size_int (0);
 781       *off = fold_convert (ssizetype, op0);
 782       if (result_range)
 783         {
 784           wide_int w = wi::to_wide (op0);
 785           result_range->set (TREE_TYPE (op0), w, w);
 786         }
 787       return true;
 788
 789     case POINTER_PLUS_EXPR:
 790       split_constant_offset (op0, &var0, &off0, nullptr, cache, limit);
 791       split_constant_offset (op1, &var1, &off1, nullptr, cache, limit);
 792       *var = fold_build2 (POINTER_PLUS_EXPR, type, var0, var1);
 793       *off = size_binop (PLUS_EXPR, off0, off1);
 794       return true;
 795
 796     case PLUS_EXPR:
 797     case MINUS_EXPR:
 798       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 799       split_constant_offset (op1, &var1, &off1, &op1_range, cache, limit);
 800       *off = size_binop (code, off0, off1);
 801       if (!compute_distributive_range (type, op0_range, code, op1_range,
 802                                        off, result_range))
 803         return false;
 804       *var = fold_build2 (code, sizetype, var0, var1);
 805       return true;
 806
 807     case MULT_EXPR:
 808       if (TREE_CODE (op1) != INTEGER_CST)
 809         return false;
 810
 811       split_constant_offset (op0, &var0, &off0, &op0_range, cache, limit);
 812       op1_range.set (TREE_TYPE (op1), wi::to_wide (op1), wi::to_wide (op1));
 813       *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
 814       if (!compute_distributive_range (type, op0_range, code, op1_range,
 815                                        off, result_range))
 816         return false;
 817       *var = fold_build2 (MULT_EXPR, sizetype, var0,
 818                           fold_convert (sizetype, op1));
 819       return true;
 820
 821     case ADDR_EXPR:
 822       {
 823         tree base, poffset;
 824         poly_int64 pbitsize, pbitpos, pbytepos;
 825         machine_mode pmode;
 826         int punsignedp, preversep, pvolatilep;
 827
 828         op0 = TREE_OPERAND (op0, 0);
 829         base
 830           = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset, &pmode,
 831                                  &punsignedp, &preversep, &pvolatilep);
 832
 833         if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
 834           return false;
 835         base = build_fold_addr_expr (base);
 836         off0 = ssize_int (pbytepos);
 837
 838         if (poffset)
 839           {
 840             split_constant_offset (poffset, &poffset, &off1, nullptr,
 841                                    cache, limit);
 842             off0 = size_binop (PLUS_EXPR, off0, off1);
 843             base = fold_build_pointer_plus (base, poffset);
 844           }
 845
 846         var0 = fold_convert (type, base);
 847
 848         /* If variable length types are involved, punt, otherwise casts
 849            might be converted into ARRAY_REFs in gimplify_conversion.
 850            To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
 851            possibly no longer appears in current GIMPLE, might resurface.
 852            This perhaps could run
 853            if (CONVERT_EXPR_P (var0))
 854              {
 855                gimplify_conversion (&var0);
 856                // Attempt to fill in any within var0 found ARRAY_REF's
 857                // element size from corresponding op embedded ARRAY_REF,
 858                // if unsuccessful, just punt.
 859              }  */
 860         while (POINTER_TYPE_P (type))
 861           type = TREE_TYPE (type);
 862         if (int_size_in_bytes (type) < 0)
 863           return false;
 864
 865         *var = var0;
 866         *off = off0;
 867         return true;
 868       }
 869
 870     case SSA_NAME:
 871       {
 872         gimple *def_stmt = SSA_NAME_DEF_STMT (op0);
 873         enum tree_code subcode;
 874
 875         if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
 876           return false;
 877
 878         subcode = gimple_assign_rhs_code (def_stmt);
 879
 880         /* We are using a cache to avoid un-CSEing large amounts of code.  */
 881         bool use_cache = false;
 882         if (!has_single_use (op0)
 883             && (subcode == POINTER_PLUS_EXPR
 884                 || subcode == PLUS_EXPR
 885                 || subcode == MINUS_EXPR
 886                 || subcode == MULT_EXPR
 887                 || subcode == ADDR_EXPR
 888                 || CONVERT_EXPR_CODE_P (subcode)))
 889           {
 890             use_cache = true;
 891             bool existed;
 892             std::pair<tree, tree> &e = cache.get_or_insert (op0, &existed);
 893             if (existed)
 894               {
 895                 if (integer_zerop (e.second))
 896                   return false;
 897                 *var = e.first;
 898                 *off = e.second;
 899                 /* The caller sets the range in this case.  */
 900                 return true;
 901               }
 902             e = std::make_pair (op0, ssize_int (0));
 903           }
 904
 905         if (*limit == 0)
 906           return false;
 907         --*limit;
 908
 909         var0 = gimple_assign_rhs1 (def_stmt);
 910         var1 = gimple_assign_rhs2 (def_stmt);
 911
 912         bool res = split_constant_offset_1 (type, var0, subcode, var1,
 913                                             var, off, nullptr, cache, limit);
 914         if (res && use_cache)
 915           *cache.get (op0) = std::make_pair (*var, *off);
 916         /* The caller sets the range in this case.  */
 917         return res;
 918       }
 919     CASE_CONVERT:
 920       {
 921         /* We can only handle the following conversions:
 922
 923            - Conversions from one pointer type to another pointer type.
 924
 925            - Conversions from one non-trapping integral type to another
 926              non-trapping integral type.  In this case, the recursive
 927              call makes sure that:
 928
 929                (sizetype) OP0
 930
 931              can be expressed as a sizetype operation involving VAR and OFF,
 932              and all we need to do is check whether:
 933
 934                (sizetype) OP0 == (sizetype) (TYPE) OP0
 935
 936            - Conversions from a non-trapping sizetype-size integral type to
 937              a like-sized pointer type.  In this case, the recursive call
 938              makes sure that:
 939
 940                (sizetype) OP0 == *VAR + (sizetype) *OFF
 941
 942              and we can convert that to:
 943
 944                POINTER_PLUS <(TYPE) *VAR, (sizetype) *OFF>
 945
 946            - Conversions from a sizetype-sized pointer type to a like-sized
 947              non-trapping integral type.  In this case, the recursive call
 948              makes sure that:
 949
 950                OP0 == POINTER_PLUS <*VAR, (sizetype) *OFF>
 951
 952              where the POINTER_PLUS and *VAR have the same precision as
 953              TYPE (and the same precision as sizetype).  Then:
 954
 955                (sizetype) (TYPE) OP0 == (sizetype) *VAR + (sizetype) *OFF.  */
 956         tree itype = TREE_TYPE (op0);
 957         if ((POINTER_TYPE_P (itype)
 958              || (INTEGRAL_TYPE_P (itype) && !TYPE_OVERFLOW_TRAPS (itype)))
 959             && (POINTER_TYPE_P (type)
 960                 || (INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)))
 961             && (POINTER_TYPE_P (type) == POINTER_TYPE_P (itype)
 962                 || (TYPE_PRECISION (type) == TYPE_PRECISION (sizetype)
 963                     && TYPE_PRECISION (itype) == TYPE_PRECISION (sizetype))))
 964           {
 965             if (POINTER_TYPE_P (type))
 966               {
 967                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 968                 *var = fold_convert (type, *var);
 969               }
 970             else if (POINTER_TYPE_P (itype))
 971               {
 972                 split_constant_offset (op0, var, off, nullptr, cache, limit);
 973                 *var = fold_convert (sizetype, *var);
 974               }
 975             else
 976               {
 977                 split_constant_offset (op0, var, off, &op0_range,
 978                                        cache, limit);
 979                 if (!nop_conversion_for_offset_p (type, itype, op0_range))
 980                   return false;
 981                 if (result_range)
 982                   {
 983                     *result_range = op0_range;
 984                     range_cast (*result_range, type);
 985                   }
 986               }
 987             return true;
 988           }
 989         return false;
 990       }
 991
 992     default:
 993       return false;
 994     }
 995 }
 996
 997 /* If EXP has pointer type, try to express it as:
 998
 999      POINTER_PLUS <*VAR, (sizetype) *OFF>
1000
1001    where:
1002
1003    - *VAR has the same type as EXP
1004    - *OFF is a constant of type ssizetype.
1005
1006    If EXP has an integral type, try to express (sizetype) EXP as:
1007
1008      *VAR + (sizetype) *OFF
1009
1010    where:
1011
1012    - *VAR has type sizetype
1013    - *OFF is a constant of type ssizetype.
1014
1015    If EXP_RANGE is nonnull, set it to the range of EXP.
1016
1017    CACHE caches {*VAR, *OFF} pairs for SSA names that we've previously
1018    visited.  LIMIT counts down the number of SSA names that we are
1019    allowed to process before giving up.  */
1020
1021 static void
1022 split_constant_offset (tree exp, tree *var, tree *off, irange *exp_range,
1023                        hash_map<tree, std::pair<tree, tree> > &cache,
1024                        unsigned *limit)
1025 {
1026   tree type = TREE_TYPE (exp), op0, op1;
1027   enum tree_code code;
1028
1029   code = TREE_CODE (exp);
1030   if (exp_range)
1031     {
1032       exp_range->set_varying (type);
1033       if (code == SSA_NAME)
1034         {
1035           int_range_max vr;
1036           get_range_query (cfun)->range_of_expr (vr, exp);
1037           if (vr.undefined_p ())
1038             vr.set_varying (TREE_TYPE (exp));
1039           tree vr_min, vr_max;
1040           value_range_kind vr_kind = get_legacy_range (vr, vr_min, vr_max);
1041           wide_int var_min = wi::to_wide (vr_min);
1042           wide_int var_max = wi::to_wide (vr_max);
1043           wide_int var_nonzero = get_nonzero_bits (exp);
1044           vr_kind = intersect_range_with_nonzero_bits (vr_kind,
1045                                                        &var_min, &var_max,
1046                                                        var_nonzero,
1047                                                        TYPE_SIGN (type));
1048           /* This check for VR_VARYING is here because the old code
1049              using get_range_info would return VR_RANGE for the entire
1050              domain, instead of VR_VARYING.  The new code normalizes
1051              full-domain ranges to VR_VARYING.  */
1052           if (vr_kind == VR_RANGE || vr_kind == VR_VARYING)
1053             exp_range->set (type, var_min, var_max);
1054         }
1055     }
1056
1057   if (!tree_is_chrec (exp)
1058       && get_gimple_rhs_class (TREE_CODE (exp)) != GIMPLE_TERNARY_RHS)
1059     {
1060       extract_ops_from_tree (exp, &code, &op0, &op1);
1061       if (split_constant_offset_1 (type, op0, code, op1, var, off,
1062                                    exp_range, cache, limit))
1063         return;
1064     }
1065
1066   *var = exp;
1067   if (INTEGRAL_TYPE_P (type))
1068     *var = fold_convert (sizetype, *var);
1069   *off = ssize_int (0);
1070
1071   int_range_max r;
1072   if (exp_range && code != SSA_NAME
1073       && get_range_query (cfun)->range_of_expr (r, exp)
1074       && !r.undefined_p ())
1075     *exp_range = r;
1076 }
1077
1078 /* Expresses EXP as VAR + OFF, where OFF is a constant.  VAR has the same
1079    type as EXP while OFF has type ssizetype.  */
1080
1081 void
1082 split_constant_offset (tree exp, tree *var, tree *off)
1083 {
1084   unsigned limit = param_ssa_name_def_chain_limit;
1085   static hash_map<tree, std::pair<tree, tree> > *cache;
1086   if (!cache)
1087     cache = new hash_map<tree, std::pair<tree, tree> > (37);
1088   split_constant_offset (exp, var, off, nullptr, *cache, &limit);
1089   *var = fold_convert (TREE_TYPE (exp), *var);
1090   cache->empty ();
1091 }
1092
1093 /* Returns the address ADDR of an object in a canonical shape (without nop
1094    casts, and with type of pointer to the object).  */
1095
1096 static tree
1097 canonicalize_base_object_address (tree addr)
1098 {
1099   tree orig = addr;
1100
1101   STRIP_NOPS (addr);
1102
1103   /* The base address may be obtained by casting from integer, in that case
1104      keep the cast.  */
1105   if (!POINTER_TYPE_P (TREE_TYPE (addr)))
1106     return orig;
1107
1108   if (TREE_CODE (addr) != ADDR_EXPR)
1109     return addr;
1110
1111   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
1112 }
1113
1114 /* Analyze the behavior of memory reference REF within STMT.
1115    There are two modes:
1116
1117    - BB analysis.  In this case we simply split the address into base,
1118      init and offset components, without reference to any containing loop.
1119      The resulting base and offset are general expressions and they can
1120      vary arbitrarily from one iteration of the containing loop to the next.
1121      The step is always zero.
1122
1123    - loop analysis.  In this case we analyze the reference both wrt LOOP
1124      and on the basis that the reference occurs (is "used") in LOOP;
1125      see the comment above analyze_scalar_evolution_in_loop for more
1126      information about this distinction.  The base, init, offset and
1127      step fields are all invariant in LOOP.
1128
1129    Perform BB analysis if LOOP is null, or if LOOP is the function's
1130    dummy outermost loop.  In other cases perform loop analysis.
1131
1132    Return true if the analysis succeeded and store the results in DRB if so.
1133    BB analysis can only fail for bitfield or reversed-storage accesses.  */
1134
1135 opt_result
1136 dr_analyze_innermost (innermost_loop_behavior *drb, tree ref,
1137                       class loop *loop, const gimple *stmt)
1138 {
1139   poly_int64 pbitsize, pbitpos;
1140   tree base, poffset;
1141   machine_mode pmode;
1142   int punsignedp, preversep, pvolatilep;
1143   affine_iv base_iv, offset_iv;
1144   tree init, dinit, step;
1145   bool in_loop = (loop && loop->num);
1146
1147   if (dump_file && (dump_flags & TDF_DETAILS))
1148     fprintf (dump_file, "analyze_innermost: ");
1149
1150   base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset, &pmode,
1151                               &punsignedp, &preversep, &pvolatilep);
1152   gcc_assert (base != NULL_TREE);
1153
1154   poly_int64 pbytepos;
1155   if (!multiple_p (pbitpos, BITS_PER_UNIT, &pbytepos))
1156     return opt_result::failure_at (stmt,
1157                                    "failed: bit offset alignment.\n");
1158
1159   if (preversep)
1160     return opt_result::failure_at (stmt,
1161                                    "failed: reverse storage order.\n");
1162
1163   /* Calculate the alignment and misalignment for the inner reference.  */
1164   unsigned int HOST_WIDE_INT bit_base_misalignment;
1165   unsigned int bit_base_alignment;
1166   get_object_alignment_1 (base, &bit_base_alignment, &bit_base_misalignment);
1167
1168   /* There are no bitfield references remaining in BASE, so the values
1169      we got back must be whole bytes.  */
1170   gcc_assert (bit_base_alignment % BITS_PER_UNIT == 0
1171               && bit_base_misalignment % BITS_PER_UNIT == 0);
1172   unsigned int base_alignment = bit_base_alignment / BITS_PER_UNIT;
1173   poly_int64 base_misalignment = bit_base_misalignment / BITS_PER_UNIT;
1174
1175   if (TREE_CODE (base) == MEM_REF)
1176     {
1177       if (!integer_zerop (TREE_OPERAND (base, 1)))
1178         {
1179           /* Subtract MOFF from the base and add it to POFFSET instead.
1180              Adjust the misalignment to reflect the amount we subtracted.  */
1181           poly_offset_int moff = mem_ref_offset (base);
1182           base_misalignment -= moff.force_shwi ();
1183           tree mofft = wide_int_to_tree (sizetype, moff);
1184           if (!poffset)
1185             poffset = mofft;
1186           else
1187             poffset = size_binop (PLUS_EXPR, poffset, mofft);
1188         }
1189       base = TREE_OPERAND (base, 0);
1190     }
1191   else
1192     {
1193       if (may_be_nonaddressable_p (base))
1194         return opt_result::failure_at (stmt,
1195                                        "failed: base not addressable.\n");
1196       base = build_fold_addr_expr (base);
1197     }
1198
1199   if (in_loop)
1200     {
1201       if (!simple_iv (loop, loop, base, &base_iv, true))
1202         return opt_result::failure_at
1203           (stmt, "failed: evolution of base is not affine.\n");
1204     }
1205   else
1206     {
1207       base_iv.base = base;
1208       base_iv.step = ssize_int (0);
1209       base_iv.no_overflow = true;
1210     }
1211
1212   if (!poffset)
1213     {
1214       offset_iv.base = ssize_int (0);
1215       offset_iv.step = ssize_int (0);
1216     }
1217   else
1218     {
1219       if (!in_loop)
1220         {
1221           offset_iv.base = poffset;
1222           offset_iv.step = ssize_int (0);
1223         }
1224       else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
1225         return opt_result::failure_at
1226           (stmt, "failed: evolution of offset is not affine.\n");
1227     }
1228
1229   init = ssize_int (pbytepos);
1230
1231   /* Subtract any constant component from the base and add it to INIT instead.
1232      Adjust the misalignment to reflect the amount we subtracted.  */
1233   split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1234   init = size_binop (PLUS_EXPR, init, dinit);
1235   base_misalignment -= TREE_INT_CST_LOW (dinit);
1236
1237   split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1238   init = size_binop (PLUS_EXPR, init, dinit);
1239
1240   step = size_binop (PLUS_EXPR,
1241                      fold_convert (ssizetype, base_iv.step),
1242                      fold_convert (ssizetype, offset_iv.step));
1243
1244   base = canonicalize_base_object_address (base_iv.base);
1245
1246   /* See if get_pointer_alignment can guarantee a higher alignment than
1247      the one we calculated above.  */
1248   unsigned int HOST_WIDE_INT alt_misalignment;
1249   unsigned int alt_alignment;
1250   get_pointer_alignment_1 (base, &alt_alignment, &alt_misalignment);
1251
1252   /* As above, these values must be whole bytes.  */
1253   gcc_assert (alt_alignment % BITS_PER_UNIT == 0
1254               && alt_misalignment % BITS_PER_UNIT == 0);
1255   alt_alignment /= BITS_PER_UNIT;
1256   alt_misalignment /= BITS_PER_UNIT;
1257
1258   if (base_alignment < alt_alignment)
1259     {
1260       base_alignment = alt_alignment;
1261       base_misalignment = alt_misalignment;
1262     }
1263
1264   drb->base_address = base;
1265   drb->offset = fold_convert (ssizetype, offset_iv.base);
1266   drb->init = init;
1267   drb->step = step;
1268   if (known_misalignment (base_misalignment, base_alignment,
1269                           &drb->base_misalignment))
1270     drb->base_alignment = base_alignment;
1271   else
1272     {
1273       drb->base_alignment = known_alignment (base_misalignment);
1274       drb->base_misalignment = 0;
1275     }
1276   drb->offset_alignment = highest_pow2_factor (offset_iv.base);
1277   drb->step_alignment = highest_pow2_factor (step);
1278
1279   if (dump_file && (dump_flags & TDF_DETAILS))
1280     fprintf (dump_file, "success.\n");
1281
1282   return opt_result::success ();
1283 }
1284
1285 /* Return true if OP is a valid component reference for a DR access
1286    function.  This accepts a subset of what handled_component_p accepts.  */
1287
1288 static bool
1289 access_fn_component_p (tree op)
1290 {
1291   switch (TREE_CODE (op))
1292     {
1293     case REALPART_EXPR:
1294     case IMAGPART_EXPR:
1295     case ARRAY_REF:
1296       return true;
1297
1298     case COMPONENT_REF:
1299       return TREE_CODE (TREE_TYPE (TREE_OPERAND (op, 0))) == RECORD_TYPE;
1300
1301     default:
1302       return false;
1303     }
1304 }
1305
1306 /* Returns whether BASE can have a access_fn_component_p with BASE
1307    as base.  */
1308
1309 static bool
1310 base_supports_access_fn_components_p (tree base)
1311 {
1312   switch (TREE_CODE (TREE_TYPE (base)))
1313     {
1314     case COMPLEX_TYPE:
1315     case ARRAY_TYPE:
1316     case RECORD_TYPE:
1317       return true;
1318     default:
1319       return false;
1320     }
1321 }
1322
1323 /* Determines the base object and the list of indices of memory reference
1324    DR, analyzed in LOOP and instantiated before NEST.  */
1325
1326 static void
1327 dr_analyze_indices (struct indices *dri, tree ref, edge nest, loop_p loop)
1328 {
1329   /* If analyzing a basic-block there are no indices to analyze
1330      and thus no access functions.  */
1331   if (!nest)
1332     {
1333       dri->base_object = ref;
1334       dri->access_fns.create (0);
1335       return;
1336     }
1337
1338   vec<tree> access_fns = vNULL;
1339
1340   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
1341      into a two element array with a constant index.  The base is
1342      then just the immediate underlying object.  */
1343   if (TREE_CODE (ref) == REALPART_EXPR)
1344     {
1345       ref = TREE_OPERAND (ref, 0);
1346       access_fns.safe_push (integer_zero_node);
1347     }
1348   else if (TREE_CODE (ref) == IMAGPART_EXPR)
1349     {
1350       ref = TREE_OPERAND (ref, 0);
1351       access_fns.safe_push (integer_one_node);
1352     }
1353
1354   /* Analyze access functions of dimensions we know to be independent.
1355      The list of component references handled here should be kept in
1356      sync with access_fn_component_p.  */
1357   while (handled_component_p (ref))
1358     {
1359       if (TREE_CODE (ref) == ARRAY_REF)
1360         {
1361           tree op = TREE_OPERAND (ref, 1);
1362           tree access_fn = analyze_scalar_evolution (loop, op);
1363           access_fn = instantiate_scev (nest, loop, access_fn);
1364           access_fns.safe_push (access_fn);
1365         }
1366       else if (TREE_CODE (ref) == COMPONENT_REF
1367                && TREE_CODE (TREE_TYPE (TREE_OPERAND (ref, 0))) == RECORD_TYPE)
1368         {
1369           /* For COMPONENT_REFs of records (but not unions!) use the
1370              FIELD_DECL offset as constant access function so we can
1371              disambiguate a[i].f1 and a[i].f2.  */
1372           tree off = component_ref_field_offset (ref);
1373           off = size_binop (PLUS_EXPR,
1374                             size_binop (MULT_EXPR,
1375                                         fold_convert (bitsizetype, off),
1376                                         bitsize_int (BITS_PER_UNIT)),
1377                             DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1)));
1378           access_fns.safe_push (off);
1379         }
1380       else
1381         /* If we have an unhandled component we could not translate
1382            to an access function stop analyzing.  We have determined
1383            our base object in this case.  */
1384         break;
1385
1386       ref = TREE_OPERAND (ref, 0);
1387     }
1388
1389   /* If the address operand of a MEM_REF base has an evolution in the
1390      analyzed nest, add it as an additional independent access-function.  */
1391   if (TREE_CODE (ref) == MEM_REF)
1392     {
1393       tree op = TREE_OPERAND (ref, 0);
1394       tree access_fn = analyze_scalar_evolution (loop, op);
1395       access_fn = instantiate_scev (nest, loop, access_fn);
1396       STRIP_NOPS (access_fn);
1397       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
1398         {
1399           tree memoff = TREE_OPERAND (ref, 1);
1400           tree base = initial_condition (access_fn);
1401           tree orig_type = TREE_TYPE (base);
1402           STRIP_USELESS_TYPE_CONVERSION (base);
1403           tree off;
1404           split_constant_offset (base, &base, &off);
1405           STRIP_USELESS_TYPE_CONVERSION (base);
1406           /* Fold the MEM_REF offset into the evolutions initial
1407              value to make more bases comparable.  */
1408           if (!integer_zerop (memoff))
1409             {
1410               off = size_binop (PLUS_EXPR, off,
1411                                 fold_convert (ssizetype, memoff));
1412               memoff = build_int_cst (TREE_TYPE (memoff), 0);
1413             }
1414           /* Adjust the offset so it is a multiple of the access type
1415              size and thus we separate bases that can possibly be used
1416              to produce partial overlaps (which the access_fn machinery
1417              cannot handle).  */
1418           wide_int rem;
1419           if (TYPE_SIZE_UNIT (TREE_TYPE (ref))
1420               && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (ref))) == INTEGER_CST
1421               && !integer_zerop (TYPE_SIZE_UNIT (TREE_TYPE (ref))))
1422             rem = wi::mod_trunc
1423               (wi::to_wide (off),
1424                wi::to_wide (TYPE_SIZE_UNIT (TREE_TYPE (ref))),
1425                SIGNED);
1426           else
1427             /* If we can't compute the remainder simply force the initial
1428                condition to zero.  */
1429             rem = wi::to_wide (off);
1430           off = wide_int_to_tree (ssizetype, wi::to_wide (off) - rem);
1431           memoff = wide_int_to_tree (TREE_TYPE (memoff), rem);
1432           /* And finally replace the initial condition.  */
1433           access_fn = chrec_replace_initial_condition
1434               (access_fn, fold_convert (orig_type, off));
1435           /* ???  This is still not a suitable base object for
1436              dr_may_alias_p - the base object needs to be an
1437              access that covers the object as whole.  With
1438              an evolution in the pointer this cannot be
1439              guaranteed.
1440              As a band-aid, mark the access so we can special-case
1441              it in dr_may_alias_p.  */
1442           tree old = ref;
1443           ref = fold_build2_loc (EXPR_LOCATION (ref),
1444                                  MEM_REF, TREE_TYPE (ref),
1445                                  base, memoff);
1446           MR_DEPENDENCE_CLIQUE (ref) = MR_DEPENDENCE_CLIQUE (old);
1447           MR_DEPENDENCE_BASE (ref) = MR_DEPENDENCE_BASE (old);
1448           dri->unconstrained_base = true;
1449           access_fns.safe_push (access_fn);
1450         }
1451     }
1452   else if (DECL_P (ref))
1453     {
1454       /* Canonicalize DR_BASE_OBJECT to MEM_REF form.  */
1455       ref = build2 (MEM_REF, TREE_TYPE (ref),
1456                     build_fold_addr_expr (ref),
1457                     build_int_cst (reference_alias_ptr_type (ref), 0));
1458     }
1459
1460   dri->base_object = ref;
1461   dri->access_fns = access_fns;
1462 }
1463
1464 /* Extracts the alias analysis information from the memory reference DR.  */
1465
1466 static void
1467 dr_analyze_alias (struct data_reference *dr)
1468 {
1469   tree ref = DR_REF (dr);
1470   tree base = get_base_address (ref), addr;
1471
1472   if (INDIRECT_REF_P (base)
1473       || TREE_CODE (base) == MEM_REF)
1474     {
1475       addr = TREE_OPERAND (base, 0);
1476       if (TREE_CODE (addr) == SSA_NAME)
1477         DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
1478     }
1479 }
1480
1481 /* Frees data reference DR.  */
1482
1483 void
1484 free_data_ref (data_reference_p dr)
1485 {
1486   DR_ACCESS_FNS (dr).release ();
1487   if (dr->alt_indices.base_object)
1488     dr->alt_indices.access_fns.release ();
1489   free (dr);
1490 }
1491
1492 /* Analyze memory reference MEMREF, which is accessed in STMT.
1493    The reference is a read if IS_READ is true, otherwise it is a write.
1494    IS_CONDITIONAL_IN_STMT indicates that the reference is conditional
1495    within STMT, i.e. that it might not occur even if STMT is executed
1496    and runs to completion.
1497
1498    Return the data_reference description of MEMREF.  NEST is the outermost
1499    loop in which the reference should be instantiated, LOOP is the loop
1500    in which the data reference should be analyzed.  */
1501
1502 struct data_reference *
1503 create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
1504                  bool is_read, bool is_conditional_in_stmt)
1505 {
1506   struct data_reference *dr;
1507
1508   if (dump_file && (dump_flags & TDF_DETAILS))
1509     {
1510       fprintf (dump_file, "Creating dr for ");
1511       print_generic_expr (dump_file, memref, TDF_SLIM);
1512       fprintf (dump_file, "\n");
1513     }
1514
1515   dr = XCNEW (struct data_reference);
1516   DR_STMT (dr) = stmt;
1517   DR_REF (dr) = memref;
1518   DR_IS_READ (dr) = is_read;
1519   DR_IS_CONDITIONAL_IN_STMT (dr) = is_conditional_in_stmt;
1520
1521   dr_analyze_innermost (&DR_INNERMOST (dr), memref,
1522                         nest != NULL ? loop : NULL, stmt);
1523   dr_analyze_indices (&dr->indices, DR_REF (dr), nest, loop);
1524   dr_analyze_alias (dr);
1525
1526   if (dump_file && (dump_flags & TDF_DETAILS))
1527     {
1528       unsigned i;
1529       fprintf (dump_file, "\tbase_address: ");
1530       print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
1531       fprintf (dump_file, "\n\toffset from base address: ");
1532       print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
1533       fprintf (dump_file, "\n\tconstant offset from base address: ");
1534       print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
1535       fprintf (dump_file, "\n\tstep: ");
1536       print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
1537       fprintf (dump_file, "\n\tbase alignment: %d", DR_BASE_ALIGNMENT (dr));
1538       fprintf (dump_file, "\n\tbase misalignment: %d",
1539                DR_BASE_MISALIGNMENT (dr));
1540       fprintf (dump_file, "\n\toffset alignment: %d",
1541                DR_OFFSET_ALIGNMENT (dr));
1542       fprintf (dump_file, "\n\tstep alignment: %d", DR_STEP_ALIGNMENT (dr));
1543       fprintf (dump_file, "\n\tbase_object: ");
1544       print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
1545       fprintf (dump_file, "\n");
1546       for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
1547         {
1548           fprintf (dump_file, "\tAccess function %d: ", i);
1549           print_generic_stmt (dump_file, DR_ACCESS_FN (dr, i), TDF_SLIM);
1550         }
1551     }
1552
1553   return dr;
1554 }
1555
1556 /*  A helper function computes order between two tree expressions T1 and T2.
1557     This is used in comparator functions sorting objects based on the order
1558     of tree expressions.  The function returns -1, 0, or 1.  */
1559
1560 int
1561 data_ref_compare_tree (tree t1, tree t2)
1562 {
1563   int i, cmp;
1564   enum tree_code code;
1565   char tclass;
1566
1567   if (t1 == t2)
1568     return 0;
1569   if (t1 == NULL)
1570     return -1;
1571   if (t2 == NULL)
1572     return 1;
1573
1574   STRIP_USELESS_TYPE_CONVERSION (t1);
1575   STRIP_USELESS_TYPE_CONVERSION (t2);
1576   if (t1 == t2)
1577     return 0;
1578
1579   if (TREE_CODE (t1) != TREE_CODE (t2)
1580       && ! (CONVERT_EXPR_P (t1) && CONVERT_EXPR_P (t2)))
1581     return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
1582
1583   code = TREE_CODE (t1);
1584   switch (code)
1585     {
1586     case INTEGER_CST:
1587       return tree_int_cst_compare (t1, t2);
1588
1589     case STRING_CST:
1590       if (TREE_STRING_LENGTH (t1) != TREE_STRING_LENGTH (t2))
1591         return TREE_STRING_LENGTH (t1) < TREE_STRING_LENGTH (t2) ? -1 : 1;
1592       return memcmp (TREE_STRING_POINTER (t1), TREE_STRING_POINTER (t2),
1593                      TREE_STRING_LENGTH (t1));
1594
1595     case SSA_NAME:
1596       if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
1597         return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
1598       break;
1599
1600     default:
1601       if (POLY_INT_CST_P (t1))
1602         return compare_sizes_for_sort (wi::to_poly_widest (t1),
1603                                        wi::to_poly_widest (t2));
1604
1605       tclass = TREE_CODE_CLASS (code);
1606
1607       /* For decls, compare their UIDs.  */
1608       if (tclass == tcc_declaration)
1609         {
1610           if (DECL_UID (t1) != DECL_UID (t2))
1611             return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
1612           break;
1613         }
1614       /* For expressions, compare their operands recursively.  */
1615       else if (IS_EXPR_CODE_CLASS (tclass))
1616         {
1617           for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
1618             {
1619               cmp = data_ref_compare_tree (TREE_OPERAND (t1, i),
1620                                            TREE_OPERAND (t2, i));
1621               if (cmp != 0)
1622                 return cmp;
1623             }
1624         }
1625       else
1626         gcc_unreachable ();
1627     }
1628
1629   return 0;
1630 }
1631
1632 /* Return TRUE it's possible to resolve data dependence DDR by runtime alias
1633    check.  */
1634
1635 opt_result
1636 runtime_alias_check_p (ddr_p ddr, class loop *loop, bool speed_p)
1637 {
1638   if (dump_enabled_p ())
1639     dump_printf (MSG_NOTE,
1640                  "consider run-time aliasing test between %T and %T\n",
1641                  DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
1642
1643   if (!speed_p)
1644     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1645                                    "runtime alias check not supported when"
1646                                    " optimizing for size.\n");
1647
1648   /* FORNOW: We don't support versioning with outer-loop in either
1649      vectorization or loop distribution.  */
1650   if (loop != NULL && loop->inner != NULL)
1651     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1652                                    "runtime alias check not supported for"
1653                                    " outer loop.\n");
1654
1655   /* FORNOW: We don't support handling different address spaces.  */
1656   if (TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_A (ddr)))))
1657       != TYPE_ADDR_SPACE (TREE_TYPE (TREE_TYPE (DR_BASE_ADDRESS (DDR_B (ddr))))))
1658     return opt_result::failure_at (DR_STMT (DDR_A (ddr)),
1659                                    "runtime alias check between different "
1660                                    "address spaces not supported.\n");
1661
1662   return opt_result::success ();
1663 }
1664
1665 /* Operator == between two dr_with_seg_len objects.
1666
1667    This equality operator is used to make sure two data refs
1668    are the same one so that we will consider to combine the
1669    aliasing checks of those two pairs of data dependent data
1670    refs.  */
1671
1672 static bool
1673 operator == (const dr_with_seg_len& d1,
1674              const dr_with_seg_len& d2)
1675 {
1676   return (operand_equal_p (DR_BASE_ADDRESS (d1.dr),
1677                            DR_BASE_ADDRESS (d2.dr), 0)
1678           && data_ref_compare_tree (DR_OFFSET (d1.dr), DR_OFFSET (d2.dr)) == 0
1679           && data_ref_compare_tree (DR_INIT (d1.dr), DR_INIT (d2.dr)) == 0
1680           && data_ref_compare_tree (d1.seg_len, d2.seg_len) == 0
1681           && known_eq (d1.access_size, d2.access_size)
1682           && d1.align == d2.align);
1683 }
1684
1685 /* Comparison function for sorting objects of dr_with_seg_len_pair_t
1686    so that we can combine aliasing checks in one scan.  */
1687
1688 static int
1689 comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
1690 {
1691   const dr_with_seg_len_pair_t* pa = (const dr_with_seg_len_pair_t *) pa_;
1692   const dr_with_seg_len_pair_t* pb = (const dr_with_seg_len_pair_t *) pb_;
1693   const dr_with_seg_len &a1 = pa->first, &a2 = pa->second;
1694   const dr_with_seg_len &b1 = pb->first, &b2 = pb->second;
1695
1696   /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
1697      if a and c have the same basic address snd step, and b and d have the same
1698      address and step.  Therefore, if any a&c or b&d don't have the same address
1699      and step, we don't care the order of those two pairs after sorting.  */
1700   int comp_res;
1701
1702   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a1.dr),
1703                                          DR_BASE_ADDRESS (b1.dr))) != 0)
1704     return comp_res;
1705   if ((comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (a2.dr),
1706                                          DR_BASE_ADDRESS (b2.dr))) != 0)
1707     return comp_res;
1708   if ((comp_res = data_ref_compare_tree (DR_STEP (a1.dr),
1709                                          DR_STEP (b1.dr))) != 0)
1710     return comp_res;
1711   if ((comp_res = data_ref_compare_tree (DR_STEP (a2.dr),
1712                                          DR_STEP (b2.dr))) != 0)
1713     return comp_res;
1714   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a1.dr),
1715                                          DR_OFFSET (b1.dr))) != 0)
1716     return comp_res;
1717   if ((comp_res = data_ref_compare_tree (DR_INIT (a1.dr),
1718                                          DR_INIT (b1.dr))) != 0)
1719     return comp_res;
1720   if ((comp_res = data_ref_compare_tree (DR_OFFSET (a2.dr),
1721                                          DR_OFFSET (b2.dr))) != 0)
1722     return comp_res;
1723   if ((comp_res = data_ref_compare_tree (DR_INIT (a2.dr),
1724                                          DR_INIT (b2.dr))) != 0)
1725     return comp_res;
1726
1727   return 0;
1728 }
1729
1730 /* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
1731
1732 static void
1733 dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
1734 {
1735   dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
1736                DR_REF (alias_pair->first.dr),
1737                DR_REF (alias_pair->second.dr));
1738
1739   dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
1740                alias_pair->first.seg_len);
1741   if (!operand_equal_p (alias_pair->first.seg_len,
1742                         alias_pair->second.seg_len, 0))
1743     dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
1744
1745   dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
1746   dump_dec (MSG_NOTE, alias_pair->first.access_size);
1747   if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
1748     {
1749       dump_printf (MSG_NOTE, " vs. ");
1750       dump_dec (MSG_NOTE, alias_pair->second.access_size);
1751     }
1752
1753   dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
1754                alias_pair->first.align);
1755   if (alias_pair->first.align != alias_pair->second.align)
1756     dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
1757
1758   dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
1759   if (alias_pair->flags & DR_ALIAS_RAW)
1760     dump_printf (MSG_NOTE, " RAW");
1761   if (alias_pair->flags & DR_ALIAS_WAR)
1762     dump_printf (MSG_NOTE, " WAR");
1763   if (alias_pair->flags & DR_ALIAS_WAW)
1764     dump_printf (MSG_NOTE, " WAW");
1765   if (alias_pair->flags & DR_ALIAS_ARBITRARY)
1766     dump_printf (MSG_NOTE, " ARBITRARY");
1767   if (alias_pair->flags & DR_ALIAS_SWAPPED)
1768     dump_printf (MSG_NOTE, " SWAPPED");
1769   if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
1770     dump_printf (MSG_NOTE, " UNSWAPPED");
1771   if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
1772     dump_printf (MSG_NOTE, " MIXED_STEPS");
1773   if (alias_pair->flags == 0)
1774     dump_printf (MSG_NOTE, " <none>");
1775   dump_printf (MSG_NOTE, "\n");
1776 }
1777
1778 /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
1779    FACTOR is number of iterations that each data reference is accessed.
1780
1781    Basically, for each pair of dependent data refs store_ptr_0 & load_ptr_0,
1782    we create an expression:
1783
1784    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1785    || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
1786
1787    for aliasing checks.  However, in some cases we can decrease the number
1788    of checks by combining two checks into one.  For example, suppose we have
1789    another pair of data refs store_ptr_0 & load_ptr_1, and if the following
1790    condition is satisfied:
1791
1792    load_ptr_0 < load_ptr_1  &&
1793    load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
1794
1795    (this condition means, in each iteration of vectorized loop, the accessed
1796    memory of store_ptr_0 cannot be between the memory of load_ptr_0 and
1797    load_ptr_1.)
1798
1799    we then can use only the following expression to finish the alising checks
1800    between store_ptr_0 & load_ptr_0 and store_ptr_0 & load_ptr_1:
1801
1802    ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
1803    || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
1804
1805    Note that we only consider that load_ptr_0 and load_ptr_1 have the same
1806    basic address.  */
1807
1808 void
1809 prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
1810                                poly_uint64)
1811 {
1812   if (alias_pairs->is_empty ())
1813     return;
1814
1815   /* Canonicalize each pair so that the base components are ordered wrt
1816      data_ref_compare_tree.  This allows the loop below to merge more
1817      cases.  */
1818   unsigned int i;
1819   dr_with_seg_len_pair_t *alias_pair;
1820   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1821     {
1822       data_reference_p dr_a = alias_pair->first.dr;
1823       data_reference_p dr_b = alias_pair->second.dr;
1824       int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
1825                                             DR_BASE_ADDRESS (dr_b));
1826       if (comp_res == 0)
1827         comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
1828       if (comp_res == 0)
1829         comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
1830       if (comp_res > 0)
1831         {
1832           std::swap (alias_pair->first, alias_pair->second);
1833           alias_pair->flags |= DR_ALIAS_SWAPPED;
1834         }
1835       else
1836         alias_pair->flags |= DR_ALIAS_UNSWAPPED;
1837     }
1838
1839   /* Sort the collected data ref pairs so that we can scan them once to
1840      combine all possible aliasing checks.  */
1841   alias_pairs->qsort (comp_dr_with_seg_len_pair);
1842
1843   /* Scan the sorted dr pairs and check if we can combine alias checks
1844      of two neighboring dr pairs.  */
1845   unsigned int last = 0;
1846   for (i = 1; i < alias_pairs->length (); ++i)
1847     {
1848       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
1849       dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
1850       dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
1851
1852       dr_with_seg_len *dr_a1 = &alias_pair1->first;
1853       dr_with_seg_len *dr_b1 = &alias_pair1->second;
1854       dr_with_seg_len *dr_a2 = &alias_pair2->first;
1855       dr_with_seg_len *dr_b2 = &alias_pair2->second;
1856
1857       /* Remove duplicate data ref pairs.  */
1858       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
1859         {
1860           if (dump_enabled_p ())
1861             dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
1862                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1863                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1864           alias_pair1->flags |= alias_pair2->flags;
1865           continue;
1866         }
1867
1868       /* Assume that we won't be able to merge the pairs, then correct
1869          if we do.  */
1870       last += 1;
1871       if (last != i)
1872         (*alias_pairs)[last] = (*alias_pairs)[i];
1873
1874       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
1875         {
1876           /* We consider the case that DR_B1 and DR_B2 are same memrefs,
1877              and DR_A1 and DR_A2 are two consecutive memrefs.  */
1878           if (*dr_a1 == *dr_a2)
1879             {
1880               std::swap (dr_a1, dr_b1);
1881               std::swap (dr_a2, dr_b2);
1882             }
1883
1884           poly_int64 init_a1, init_a2;
1885           /* Only consider cases in which the distance between the initial
1886              DR_A1 and the initial DR_A2 is known at compile time.  */
1887           if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
1888                                 DR_BASE_ADDRESS (dr_a2->dr), 0)
1889               || !operand_equal_p (DR_OFFSET (dr_a1->dr),
1890                                    DR_OFFSET (dr_a2->dr), 0)
1891               || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
1892               || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
1893             continue;
1894
1895           /* Don't combine if we can't tell which one comes first.  */
1896           if (!ordered_p (init_a1, init_a2))
1897             continue;
1898
1899           /* Work out what the segment length would be if we did combine
1900              DR_A1 and DR_A2:
1901
1902              - If DR_A1 and DR_A2 have equal lengths, that length is
1903                also the combined length.
1904
1905              - If DR_A1 and DR_A2 both have negative "lengths", the combined
1906                length is the lower bound on those lengths.
1907
1908              - If DR_A1 and DR_A2 both have positive lengths, the combined
1909                length is the upper bound on those lengths.
1910
1911              Other cases are unlikely to give a useful combination.
1912
1913              The lengths both have sizetype, so the sign is taken from
1914              the step instead.  */
1915           poly_uint64 new_seg_len = 0;
1916           bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
1917                                                  dr_a2->seg_len, 0);
1918           if (new_seg_len_p)
1919             {
1920               poly_uint64 seg_len_a1, seg_len_a2;
1921               if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
1922                   || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
1923                 continue;
1924
1925               tree indicator_a = dr_direction_indicator (dr_a1->dr);
1926               if (TREE_CODE (indicator_a) != INTEGER_CST)
1927                 continue;
1928
1929               tree indicator_b = dr_direction_indicator (dr_a2->dr);
1930               if (TREE_CODE (indicator_b) != INTEGER_CST)
1931                 continue;
1932
1933               int sign_a = tree_int_cst_sgn (indicator_a);
1934               int sign_b = tree_int_cst_sgn (indicator_b);
1935
1936               if (sign_a <= 0 && sign_b <= 0)
1937                 new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
1938               else if (sign_a >= 0 && sign_b >= 0)
1939                 new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
1940               else
1941                 continue;
1942             }
1943           /* At this point we're committed to merging the refs.  */
1944
1945           /* Make sure dr_a1 starts left of dr_a2.  */
1946           if (maybe_gt (init_a1, init_a2))
1947             {
1948               std::swap (*dr_a1, *dr_a2);
1949               std::swap (init_a1, init_a2);
1950             }
1951
1952           /* The DR_Bs are equal, so only the DR_As can introduce
1953              mixed steps.  */
1954           if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
1955             alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
1956
1957           if (new_seg_len_p)
1958             {
1959               dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
1960                                               new_seg_len);
1961               dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
1962             }
1963
1964           /* This is always positive due to the swap above.  */
1965           poly_uint64 diff = init_a2 - init_a1;
1966
1967           /* The new check will start at DR_A1.  Make sure that its access
1968              size encompasses the initial DR_A2.  */
1969           if (maybe_lt (dr_a1->access_size, diff + dr_a2->access_size))
1970             {
1971               dr_a1->access_size = upper_bound (dr_a1->access_size,
1972                                                 diff + dr_a2->access_size);
1973               unsigned int new_align = known_alignment (dr_a1->access_size);
1974               dr_a1->align = MIN (dr_a1->align, new_align);
1975             }
1976           if (dump_enabled_p ())
1977             dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
1978                          DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
1979                          DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
1980           alias_pair1->flags |= alias_pair2->flags;
1981           last -= 1;
1982         }
1983     }
1984   alias_pairs->truncate (last + 1);
1985
1986   /* Try to restore the original dr_with_seg_len order within each
1987      dr_with_seg_len_pair_t.  If we ended up combining swapped and
1988      unswapped pairs into the same check, we have to invalidate any
1989      RAW, WAR and WAW information for it.  */
1990   if (dump_enabled_p ())
1991     dump_printf (MSG_NOTE, "merged alias checks:\n");
1992   FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
1993     {
1994       unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
1995       unsigned int swapped = (alias_pair->flags & swap_mask);
1996       if (swapped == DR_ALIAS_SWAPPED)
1997         std::swap (alias_pair->first, alias_pair->second);
1998       else if (swapped != DR_ALIAS_UNSWAPPED)
1999         alias_pair->flags |= DR_ALIAS_ARBITRARY;
2000       alias_pair->flags &= ~swap_mask;
2001       if (dump_enabled_p ())
2002         dump_alias_pair (alias_pair, "  ");
2003     }
2004 }
2005
2006 /* A subroutine of create_intersect_range_checks, with a subset of the
2007    same arguments.  Try to use IFN_CHECK_RAW_PTRS and IFN_CHECK_WAR_PTRS
2008    to optimize cases in which the references form a simple RAW, WAR or
2009    WAR dependence.  */
2010
2011 static bool
2012 create_ifn_alias_checks (tree *cond_expr,
2013                          const dr_with_seg_len_pair_t &alias_pair)
2014 {
2015   const dr_with_seg_len& dr_a = alias_pair.first;
2016   const dr_with_seg_len& dr_b = alias_pair.second;
2017
2018   /* Check for cases in which:
2019
2020      (a) we have a known RAW, WAR or WAR dependence
2021      (b) the accesses are well-ordered in both the original and new code
2022          (see the comment above the DR_ALIAS_* flags for details); and
2023      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2024   if (alias_pair.flags & ~(DR_ALIAS_RAW | DR_ALIAS_WAR | DR_ALIAS_WAW))
2025     return false;
2026
2027   /* Make sure that both DRs access the same pattern of bytes,
2028      with a constant length and step.  */
2029   poly_uint64 seg_len;
2030   if (!operand_equal_p (dr_a.seg_len, dr_b.seg_len, 0)
2031       || !poly_int_tree_p (dr_a.seg_len, &seg_len)
2032       || maybe_ne (dr_a.access_size, dr_b.access_size)
2033       || !operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0)
2034       || !tree_fits_uhwi_p (DR_STEP (dr_a.dr)))
2035     return false;
2036
2037   unsigned HOST_WIDE_INT bytes = tree_to_uhwi (DR_STEP (dr_a.dr));
2038   tree addr_a = DR_BASE_ADDRESS (dr_a.dr);
2039   tree addr_b = DR_BASE_ADDRESS (dr_b.dr);
2040
2041   /* See whether the target suports what we want to do.  WAW checks are
2042      equivalent to WAR checks here.  */
2043   internal_fn ifn = (alias_pair.flags & DR_ALIAS_RAW
2044                      ? IFN_CHECK_RAW_PTRS
2045                      : IFN_CHECK_WAR_PTRS);
2046   unsigned int align = MIN (dr_a.align, dr_b.align);
2047   poly_uint64 full_length = seg_len + bytes;
2048   if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2049                                            full_length, align))
2050     {
2051       full_length = seg_len + dr_a.access_size;
2052       if (!internal_check_ptrs_fn_supported_p (ifn, TREE_TYPE (addr_a),
2053                                                full_length, align))
2054         return false;
2055     }
2056
2057   /* Commit to using this form of test.  */
2058   addr_a = fold_build_pointer_plus (addr_a, DR_OFFSET (dr_a.dr));
2059   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2060
2061   addr_b = fold_build_pointer_plus (addr_b, DR_OFFSET (dr_b.dr));
2062   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2063
2064   *cond_expr = build_call_expr_internal_loc (UNKNOWN_LOCATION,
2065                                              ifn, boolean_type_node,
2066                                              4, addr_a, addr_b,
2067                                              size_int (full_length),
2068                                              size_int (align));
2069
2070   if (dump_enabled_p ())
2071     {
2072       if (ifn == IFN_CHECK_RAW_PTRS)
2073         dump_printf (MSG_NOTE, "using an IFN_CHECK_RAW_PTRS test\n");
2074       else
2075         dump_printf (MSG_NOTE, "using an IFN_CHECK_WAR_PTRS test\n");
2076     }
2077   return true;
2078 }
2079
2080 /* Try to generate a runtime condition that is true if ALIAS_PAIR is
2081    free of aliases, using a condition based on index values instead
2082    of a condition based on addresses.  Return true on success,
2083    storing the condition in *COND_EXPR.
2084
2085    This can only be done if the two data references in ALIAS_PAIR access
2086    the same array object and the index is the only difference.  For example,
2087    if the two data references are DR_A and DR_B:
2088
2089                        DR_A                           DR_B
2090       data-ref         arr[i]                         arr[j]
2091       base_object      arr                            arr
2092       index            {i_0, +, 1}_loop               {j_0, +, 1}_loop
2093
2094    The addresses and their index are like:
2095
2096         |<- ADDR_A    ->|          |<- ADDR_B    ->|
2097      ------------------------------------------------------->
2098         |   |   |   |   |          |   |   |   |   |
2099      ------------------------------------------------------->
2100         i_0 ...         i_0+4      j_0 ...         j_0+4
2101
2102    We can create expression based on index rather than address:
2103
2104      (unsigned) (i_0 - j_0 + 3) <= 6
2105
2106    i.e. the indices are less than 4 apart.
2107
2108    Note evolution step of index needs to be considered in comparison.  */
2109
2110 static bool
2111 create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
2112                                      const dr_with_seg_len_pair_t &alias_pair)
2113 {
2114   const dr_with_seg_len &dr_a = alias_pair.first;
2115   const dr_with_seg_len &dr_b = alias_pair.second;
2116   if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
2117       || integer_zerop (DR_STEP (dr_a.dr))
2118       || integer_zerop (DR_STEP (dr_b.dr))
2119       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
2120     return false;
2121
2122   poly_uint64 seg_len1, seg_len2;
2123   if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
2124       || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
2125     return false;
2126
2127   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
2128     return false;
2129
2130   if (!operand_equal_p (DR_BASE_OBJECT (dr_a.dr), DR_BASE_OBJECT (dr_b.dr), 0))
2131     return false;
2132
2133   if (!operand_equal_p (DR_STEP (dr_a.dr), DR_STEP (dr_b.dr), 0))
2134     return false;
2135
2136   gcc_assert (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST);
2137
2138   bool neg_step = tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0;
2139   unsigned HOST_WIDE_INT abs_step = tree_to_shwi (DR_STEP (dr_a.dr));
2140   if (neg_step)
2141     {
2142       abs_step = -abs_step;
2143       seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
2144       seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
2145     }
2146
2147   /* Infer the number of iterations with which the memory segment is accessed
2148      by DR.  In other words, alias is checked if memory segment accessed by
2149      DR_A in some iterations intersect with memory segment accessed by DR_B
2150      in the same amount iterations.
2151      Note segnment length is a linear function of number of iterations with
2152      DR_STEP as the coefficient.  */
2153   poly_uint64 niter_len1, niter_len2;
2154   if (!can_div_trunc_p (seg_len1 + abs_step - 1, abs_step, &niter_len1)
2155       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
2156     return false;
2157
2158   /* Divide each access size by the byte step, rounding up.  */
2159   poly_uint64 niter_access1, niter_access2;
2160   if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
2161                         abs_step, &niter_access1)
2162       || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
2163                            abs_step, &niter_access2))
2164     return false;
2165
2166   bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
2167
2168   int found = -1;
2169   for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
2170     {
2171       tree access1 = DR_ACCESS_FN (dr_a.dr, i);
2172       tree access2 = DR_ACCESS_FN (dr_b.dr, i);
2173       /* Two indices must be the same if they are not scev, or not scev wrto
2174          current loop being vecorized.  */
2175       if (TREE_CODE (access1) != POLYNOMIAL_CHREC
2176           || TREE_CODE (access2) != POLYNOMIAL_CHREC
2177           || CHREC_VARIABLE (access1) != (unsigned)loop->num
2178           || CHREC_VARIABLE (access2) != (unsigned)loop->num)
2179         {
2180           if (operand_equal_p (access1, access2, 0))
2181             continue;
2182
2183           return false;
2184         }
2185       if (found >= 0)
2186         return false;
2187       found = i;
2188     }
2189
2190   /* Ought not to happen in practice, since if all accesses are equal then the
2191      alias should be decidable at compile time.  */
2192   if (found < 0)
2193     return false;
2194
2195   /* The two indices must have the same step.  */
2196   tree access1 = DR_ACCESS_FN (dr_a.dr, found);
2197   tree access2 = DR_ACCESS_FN (dr_b.dr, found);
2198   if (!operand_equal_p (CHREC_RIGHT (access1), CHREC_RIGHT (access2), 0))
2199     return false;
2200
2201   tree idx_step = CHREC_RIGHT (access1);
2202   /* Index must have const step, otherwise DR_STEP won't be constant.  */
2203   gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
2204   /* Index must evaluate in the same direction as DR.  */
2205   gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
2206
2207   tree min1 = CHREC_LEFT (access1);
2208   tree min2 = CHREC_LEFT (access2);
2209   if (!types_compatible_p (TREE_TYPE (min1), TREE_TYPE (min2)))
2210     return false;
2211
2212   /* Ideally, alias can be checked against loop's control IV, but we
2213      need to prove linear mapping between control IV and reference
2214      index.  Although that should be true, we check against (array)
2215      index of data reference.  Like segment length, index length is
2216      linear function of the number of iterations with index_step as
2217      the coefficient, i.e, niter_len * idx_step.  */
2218   offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
2219                                               SIGNED);
2220   if (neg_step)
2221     abs_idx_step = -abs_idx_step;
2222   poly_offset_int idx_len1 = abs_idx_step * niter_len1;
2223   poly_offset_int idx_len2 = abs_idx_step * niter_len2;
2224   poly_offset_int idx_access1 = abs_idx_step * niter_access1;
2225   poly_offset_int idx_access2 = abs_idx_step * niter_access2;
2226
2227   gcc_assert (known_ge (idx_len1, 0)
2228               && known_ge (idx_len2, 0)
2229               && known_ge (idx_access1, 0)
2230               && known_ge (idx_access2, 0));
2231
2232   /* Each access has the following pattern, with lengths measured
2233      in units of INDEX:
2234
2235           <-- idx_len -->
2236           <--- A: -ve step --->
2237           +-----+-------+-----+-------+-----+
2238           | n-1 | ..... |  0  | ..... | n-1 |
2239           +-----+-------+-----+-------+-----+
2240                         <--- B: +ve step --->
2241                         <-- idx_len -->
2242                         |
2243                        min
2244
2245      where "n" is the number of scalar iterations covered by the segment
2246      and where each access spans idx_access units.
2247
2248      A is the range of bytes accessed when the step is negative,
2249      B is the range when the step is positive.
2250
2251      When checking for general overlap, we need to test whether
2252      the range:
2253
2254        [min1 + low_offset1, min1 + high_offset1 + idx_access1 - 1]
2255
2256      overlaps:
2257
2258        [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
2259
2260      where:
2261
2262         low_offsetN = +ve step ? 0 : -idx_lenN;
2263        high_offsetN = +ve step ? idx_lenN : 0;
2264
2265      This is equivalent to testing whether:
2266
2267        min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
2268        && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
2269
2270      Converting this into a single test, there is an overlap if:
2271
2272        0 <= min2 - min1 + bias <= limit
2273
2274      where  bias = high_offset2 + idx_access2 - 1 - low_offset1
2275            limit = (high_offset1 - low_offset1 + idx_access1 - 1)
2276                  + (high_offset2 - low_offset2 + idx_access2 - 1)
2277       i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
2278
2279      Combining the tests requires limit to be computable in an unsigned
2280      form of the index type; if it isn't, we fall back to the usual
2281      pointer-based checks.
2282
2283      We can do better if DR_B is a write and if DR_A and DR_B are
2284      well-ordered in both the original and the new code (see the
2285      comment above the DR_ALIAS_* flags for details).  In this case
2286      we know that for each i in [0, n-1], the write performed by
2287      access i of DR_B occurs after access numbers j<=i of DR_A in
2288      both the original and the new code.  Any write or anti
2289      dependencies wrt those DR_A accesses are therefore maintained.
2290
2291      We just need to make sure that each individual write in DR_B does not
2292      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2293      after the DR_B access in the original code but happen before it in
2294      the new code.
2295
2296      We know the steps for both accesses are equal, so by induction, we
2297      just need to test whether the first write of DR_B overlaps a later
2298      access of DR_A.  In other words, we need to move min1 along by
2299      one iteration:
2300
2301        min1' = min1 + idx_step
2302
2303      and use the ranges:
2304
2305        [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
2306
2307      and:
2308
2309        [min2, min2 + idx_access2 - 1]
2310
2311      where:
2312
2313         low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
2314        high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
2315   if (waw_or_war_p)
2316     idx_len1 -= abs_idx_step;
2317
2318   poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
2319   if (!waw_or_war_p)
2320     limit += idx_len2;
2321
2322   tree utype = unsigned_type_for (TREE_TYPE (min1));
2323   if (!wi::fits_to_tree_p (limit, utype))
2324     return false;
2325
2326   poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
2327   poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
2328   poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
2329   /* Equivalent to adding IDX_STEP to MIN1.  */
2330   if (waw_or_war_p)
2331     bias -= wi::to_offset (idx_step);
2332
2333   tree subject = fold_build2 (MINUS_EXPR, utype,
2334                               fold_convert (utype, min2),
2335                               fold_convert (utype, min1));
2336   subject = fold_build2 (PLUS_EXPR, utype, subject,
2337                          wide_int_to_tree (utype, bias));
2338   tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
2339                                      wide_int_to_tree (utype, limit));
2340   if (*cond_expr)
2341     *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2342                               *cond_expr, part_cond_expr);
2343   else
2344     *cond_expr = part_cond_expr;
2345   if (dump_enabled_p ())
2346     {
2347       if (waw_or_war_p)
2348         dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
2349       else
2350         dump_printf (MSG_NOTE, "using an index-based overlap test\n");
2351     }
2352   return true;
2353 }
2354
2355 /* A subroutine of create_intersect_range_checks, with a subset of the
2356    same arguments.  Try to optimize cases in which the second access
2357    is a write and in which some overlap is valid.  */
2358
2359 static bool
2360 create_waw_or_war_checks (tree *cond_expr,
2361                           const dr_with_seg_len_pair_t &alias_pair)
2362 {
2363   const dr_with_seg_len& dr_a = alias_pair.first;
2364   const dr_with_seg_len& dr_b = alias_pair.second;
2365
2366   /* Check for cases in which:
2367
2368      (a) DR_B is always a write;
2369      (b) the accesses are well-ordered in both the original and new code
2370          (see the comment above the DR_ALIAS_* flags for details); and
2371      (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
2372   if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
2373     return false;
2374
2375   /* Check for equal (but possibly variable) steps.  */
2376   tree step = DR_STEP (dr_a.dr);
2377   if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
2378     return false;
2379
2380   /* Make sure that we can operate on sizetype without loss of precision.  */
2381   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
2382   if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
2383     return false;
2384
2385   /* All addresses involved are known to have a common alignment ALIGN.
2386      We can therefore subtract ALIGN from an exclusive endpoint to get
2387      an inclusive endpoint.  In the best (and common) case, ALIGN is the
2388      same as the access sizes of both DRs, and so subtracting ALIGN
2389      cancels out the addition of an access size.  */
2390   unsigned int align = MIN (dr_a.align, dr_b.align);
2391   poly_uint64 last_chunk_a = dr_a.access_size - align;
2392   poly_uint64 last_chunk_b = dr_b.access_size - align;
2393
2394   /* Get a boolean expression that is true when the step is negative.  */
2395   tree indicator = dr_direction_indicator (dr_a.dr);
2396   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2397                                fold_convert (ssizetype, indicator),
2398                                ssize_int (0));
2399
2400   /* Get lengths in sizetype.  */
2401   tree seg_len_a
2402     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
2403   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
2404
2405   /* Each access has the following pattern:
2406
2407           <- |seg_len| ->
2408           <--- A: -ve step --->
2409           +-----+-------+-----+-------+-----+
2410           | n-1 | ..... |  0  | ..... | n-1 |
2411           +-----+-------+-----+-------+-----+
2412                         <--- B: +ve step --->
2413                         <- |seg_len| ->
2414                         |
2415                    base address
2416
2417      where "n" is the number of scalar iterations covered by the segment.
2418
2419      A is the range of bytes accessed when the step is negative,
2420      B is the range when the step is positive.
2421
2422      We know that DR_B is a write.  We also know (from checking that
2423      DR_A and DR_B are well-ordered) that for each i in [0, n-1],
2424      the write performed by access i of DR_B occurs after access numbers
2425      j<=i of DR_A in both the original and the new code.  Any write or
2426      anti dependencies wrt those DR_A accesses are therefore maintained.
2427
2428      We just need to make sure that each individual write in DR_B does not
2429      overlap any higher-indexed access in DR_A; such DR_A accesses happen
2430      after the DR_B access in the original code but happen before it in
2431      the new code.
2432
2433      We know the steps for both accesses are equal, so by induction, we
2434      just need to test whether the first write of DR_B overlaps a later
2435      access of DR_A.  In other words, we need to move addr_a along by
2436      one iteration:
2437
2438        addr_a' = addr_a + step
2439
2440      and check whether:
2441
2442        [addr_b, addr_b + last_chunk_b]
2443
2444      overlaps:
2445
2446        [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
2447
2448      where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
2449
2450         low_offset_a = +ve step ? 0 : seg_len_a - step
2451        high_offset_a = +ve step ? seg_len_a - step : 0
2452
2453      This is equivalent to testing whether:
2454
2455        addr_a' + low_offset_a <= addr_b + last_chunk_b
2456        && addr_b <= addr_a' + high_offset_a + last_chunk_a
2457
2458      Converting this into a single test, there is an overlap if:
2459
2460        0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
2461
2462      where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
2463
2464      If DR_A is performed, limit + |step| - last_chunk_b is known to be
2465      less than the size of the object underlying DR_A.  We also know
2466      that last_chunk_b <= |step|; this is checked elsewhere if it isn't
2467      guaranteed at compile time.  There can therefore be no overflow if
2468      "limit" is calculated in an unsigned type with pointer precision.  */
2469   tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
2470                                          DR_OFFSET (dr_a.dr));
2471   addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
2472
2473   tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
2474                                          DR_OFFSET (dr_b.dr));
2475   addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
2476
2477   /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
2478   addr_a = fold_build_pointer_plus (addr_a, step);
2479   tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
2480                                            seg_len_a, step);
2481   if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
2482     seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
2483
2484   tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
2485                                    seg_len_a_minus_step, size_zero_node);
2486   if (!CONSTANT_CLASS_P (low_offset_a))
2487     low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
2488
2489   /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
2490      but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
2491   tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
2492                                     low_offset_a);
2493
2494   /* The amount added to addr_b - addr_a'.  */
2495   tree bias = fold_build2 (MINUS_EXPR, sizetype,
2496                            size_int (last_chunk_b), low_offset_a);
2497
2498   tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
2499   limit = fold_build2 (PLUS_EXPR, sizetype, limit,
2500                        size_int (last_chunk_a + last_chunk_b));
2501
2502   tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
2503   subject = fold_build2 (PLUS_EXPR, sizetype,
2504                          fold_convert (sizetype, subject), bias);
2505
2506   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
2507   if (dump_enabled_p ())
2508     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
2509   return true;
2510 }
2511
2512 /* If ALIGN is nonzero, set up *SEQ_MIN_OUT and *SEQ_MAX_OUT so that for
2513    every address ADDR accessed by D:
2514
2515      *SEQ_MIN_OUT <= ADDR (== ADDR & -ALIGN) <= *SEQ_MAX_OUT
2516
2517    In this case, every element accessed by D is aligned to at least
2518    ALIGN bytes.
2519
2520    If ALIGN is zero then instead set *SEG_MAX_OUT so that:
2521
2522      *SEQ_MIN_OUT <= ADDR < *SEQ_MAX_OUT.  */
2523
2524 static void
2525 get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
2526                      tree *seg_max_out, HOST_WIDE_INT align)
2527 {
2528   /* Each access has the following pattern:
2529
2530           <- |seg_len| ->
2531           <--- A: -ve step --->
2532           +-----+-------+-----+-------+-----+
2533           | n-1 | ,.... |  0  | ..... | n-1 |
2534           +-----+-------+-----+-------+-----+
2535                         <--- B: +ve step --->
2536                         <- |seg_len| ->
2537                         |
2538                    base address
2539
2540      where "n" is the number of scalar iterations covered by the segment.
2541      (This should be VF for a particular pair if we know that both steps
2542      are the same, otherwise it will be the full number of scalar loop
2543      iterations.)
2544
2545      A is the range of bytes accessed when the step is negative,
2546      B is the range when the step is positive.
2547
2548      If the access size is "access_size" bytes, the lowest addressed byte is:
2549
2550          base + (step < 0 ? seg_len : 0)   [LB]
2551
2552      and the highest addressed byte is always below:
2553
2554          base + (step < 0 ? 0 : seg_len) + access_size   [UB]
2555
2556      Thus:
2557
2558          LB <= ADDR < UB
2559
2560      If ALIGN is nonzero, all three values are aligned to at least ALIGN
2561      bytes, so:
2562
2563          LB <= ADDR <= UB - ALIGN
2564
2565      where "- ALIGN" folds naturally with the "+ access_size" and often
2566      cancels it out.
2567
2568      We don't try to simplify LB and UB beyond this (e.g. by using
2569      MIN and MAX based on whether seg_len rather than the stride is
2570      negative) because it is possible for the absolute size of the
2571      segment to overflow the range of a ssize_t.
2572
2573      Keeping the pointer_plus outside of the cond_expr should allow
2574      the cond_exprs to be shared with other alias checks.  */
2575   tree indicator = dr_direction_indicator (d.dr);
2576   tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
2577                                fold_convert (ssizetype, indicator),
2578                                ssize_int (0));
2579   tree addr_base = fold_build_pointer_plus (DR_BASE_ADDRESS (d.dr),
2580                                             DR_OFFSET (d.dr));
2581   addr_base = fold_build_pointer_plus (addr_base, DR_INIT (d.dr));
2582   tree seg_len
2583     = fold_convert (sizetype, rewrite_to_non_trapping_overflow (d.seg_len));
2584
2585   tree min_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2586                                 seg_len, size_zero_node);
2587   tree max_reach = fold_build3 (COND_EXPR, sizetype, neg_step,
2588                                 size_zero_node, seg_len);
2589   max_reach = fold_build2 (PLUS_EXPR, sizetype, max_reach,
2590                            size_int (d.access_size - align));
2591
2592   *seg_min_out = fold_build_pointer_plus (addr_base, min_reach);
2593   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
2594 }
2595
2596 /* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
2597    storing the condition in *COND_EXPR.  The fallback is to generate a
2598    a test that the two accesses do not overlap:
2599
2600      end_a <= start_b || end_b <= start_a.  */
2601
2602 static void
2603 create_intersect_range_checks (class loop *loop, tree *cond_expr,
2604                                const dr_with_seg_len_pair_t &alias_pair)
2605 {
2606   const dr_with_seg_len& dr_a = alias_pair.first;
2607   const dr_with_seg_len& dr_b = alias_pair.second;
2608   *cond_expr = NULL_TREE;
2609   if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
2610     return;
2611
2612   if (create_ifn_alias_checks (cond_expr, alias_pair))
2613     return;
2614
2615   if (create_waw_or_war_checks (cond_expr, alias_pair))
2616     return;
2617
2618   unsigned HOST_WIDE_INT min_align;
2619   tree_code cmp_code;
2620   /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
2621      are equivalent.  This is just an optimization heuristic.  */
2622   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
2623       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
2624     {
2625       /* In this case adding access_size to seg_len is likely to give
2626          a simple X * step, where X is either the number of scalar
2627          iterations or the vectorization factor.  We're better off
2628          keeping that, rather than subtracting an alignment from it.
2629
2630          In this case the maximum values are exclusive and so there is
2631          no alias if the maximum of one segment equals the minimum
2632          of another.  */
2633       min_align = 0;
2634       cmp_code = LE_EXPR;
2635     }
2636   else
2637     {
2638       /* Calculate the minimum alignment shared by all four pointers,
2639          then arrange for this alignment to be subtracted from the
2640          exclusive maximum values to get inclusive maximum values.
2641          This "- min_align" is cumulative with a "+ access_size"
2642          in the calculation of the maximum values.  In the best
2643          (and common) case, the two cancel each other out, leaving
2644          us with an inclusive bound based only on seg_len.  In the
2645          worst case we're simply adding a smaller number than before.
2646
2647          Because the maximum values are inclusive, there is an alias
2648          if the maximum value of one segment is equal to the minimum
2649          value of the other.  */
2650       min_align = std::min (dr_a.align, dr_b.align);
2651       min_align = std::min (min_align, known_alignment (dr_a.access_size));
2652       min_align = std::min (min_align, known_alignment (dr_b.access_size));
2653       cmp_code = LT_EXPR;
2654     }
2655
2656   tree seg_a_min, seg_a_max, seg_b_min, seg_b_max;
2657   get_segment_min_max (dr_a, &seg_a_min, &seg_a_max, min_align);
2658   get_segment_min_max (dr_b, &seg_b_min, &seg_b_max, min_align);
2659
2660   *cond_expr
2661     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
2662         fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
2663         fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
2664   if (dump_enabled_p ())
2665     dump_printf (MSG_NOTE, "using an address-based overlap test\n");
2666 }
2667
2668 /* Create a conditional expression that represents the run-time checks for
2669    overlapping of address ranges represented by a list of data references
2670    pairs passed in ALIAS_PAIRS.  Data references are in LOOP.  The returned
2671    COND_EXPR is the conditional expression to be used in the if statement
2672    that controls which version of the loop gets executed at runtime.  */
2673
2674 void
2675 create_runtime_alias_checks (class loop *loop,
2676                              const vec<dr_with_seg_len_pair_t> *alias_pairs,
2677                              tree * cond_expr)
2678 {
2679   tree part_cond_expr;
2680
2681   fold_defer_overflow_warnings ();
2682   for (const dr_with_seg_len_pair_t &alias_pair : alias_pairs)
2683     {
2684       gcc_assert (alias_pair.flags);
2685       if (dump_enabled_p ())
2686         dump_printf (MSG_NOTE,
2687                      "create runtime check for data references %T and %T\n",
2688                      DR_REF (alias_pair.first.dr),
2689                      DR_REF (alias_pair.second.dr));
2690
2691       /* Create condition expression for each pair data references.  */
2692       create_intersect_range_checks (loop, &part_cond_expr, alias_pair);
2693       if (*cond_expr)
2694         *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2695                                   *cond_expr, part_cond_expr);
2696       else
2697         *cond_expr = part_cond_expr;
2698     }
2699   fold_undefer_and_ignore_overflow_warnings ();
2700 }
2701
2702 /* Check if OFFSET1 and OFFSET2 (DR_OFFSETs of some data-refs) are identical
2703    expressions.  */
2704 static bool
2705 dr_equal_offsets_p1 (tree offset1, tree offset2)
2706 {
2707   bool res;
2708
2709   STRIP_NOPS (offset1);
2710   STRIP_NOPS (offset2);
2711
2712   if (offset1 == offset2)
2713     return true;
2714
2715   if (TREE_CODE (offset1) != TREE_CODE (offset2)
2716       || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
2717     return false;
2718
2719   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 0),
2720                              TREE_OPERAND (offset2, 0));
2721
2722   if (!res || !BINARY_CLASS_P (offset1))
2723     return res;
2724
2725   res = dr_equal_offsets_p1 (TREE_OPERAND (offset1, 1),
2726                              TREE_OPERAND (offset2, 1));
2727
2728   return res;
2729 }
2730
2731 /* Check if DRA and DRB have equal offsets.  */
2732 bool
2733 dr_equal_offsets_p (struct data_reference *dra,
2734                     struct data_reference *drb)
2735 {
2736   tree offset1, offset2;
2737
2738   offset1 = DR_OFFSET (dra);
2739   offset2 = DR_OFFSET (drb);
2740
2741   return dr_equal_offsets_p1 (offset1, offset2);
2742 }
2743
2744 /* Returns true if FNA == FNB.  */
2745
2746 static bool
2747 affine_function_equal_p (affine_fn fna, affine_fn fnb)
2748 {
2749   unsigned i, n = fna.length ();
2750
2751   if (n != fnb.length ())
2752     return false;
2753
2754   for (i = 0; i < n; i++)
2755     if (!operand_equal_p (fna[i], fnb[i], 0))
2756       return false;
2757
2758   return true;
2759 }
2760
2761 /* If all the functions in CF are the same, returns one of them,
2762    otherwise returns NULL.  */
2763
2764 static affine_fn
2765 common_affine_function (conflict_function *cf)
2766 {
2767   unsigned i;
2768   affine_fn comm;
2769
2770   if (!CF_NONTRIVIAL_P (cf))
2771     return affine_fn ();
2772
2773   comm = cf->fns[0];
2774
2775   for (i = 1; i < cf->n; i++)
2776     if (!affine_function_equal_p (comm, cf->fns[i]))
2777       return affine_fn ();
2778
2779   return comm;
2780 }
2781
2782 /* Returns the base of the affine function FN.  */
2783
2784 static tree
2785 affine_function_base (affine_fn fn)
2786 {
2787   return fn[0];
2788 }
2789
2790 /* Returns true if FN is a constant.  */
2791
2792 static bool
2793 affine_function_constant_p (affine_fn fn)
2794 {
2795   unsigned i;
2796   tree coef;
2797
2798   for (i = 1; fn.iterate (i, &coef); i++)
2799     if (!integer_zerop (coef))
2800       return false;
2801
2802   return true;
2803 }
2804
2805 /* Returns true if FN is the zero constant function.  */
2806
2807 static bool
2808 affine_function_zero_p (affine_fn fn)
2809 {
2810   return (integer_zerop (affine_function_base (fn))
2811           && affine_function_constant_p (fn));
2812 }
2813
2814 /* Returns a signed integer type with the largest precision from TA
2815    and TB.  */
2816
2817 static tree
2818 signed_type_for_types (tree ta, tree tb)
2819 {
2820   if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
2821     return signed_type_for (ta);
2822   else
2823     return signed_type_for (tb);
2824 }
2825
2826 /* Applies operation OP on affine functions FNA and FNB, and returns the
2827    result.  */
2828
2829 static affine_fn
2830 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
2831 {
2832   unsigned i, n, m;
2833   affine_fn ret;
2834   tree coef;
2835
2836   if (fnb.length () > fna.length ())
2837     {
2838       n = fna.length ();
2839       m = fnb.length ();
2840     }
2841   else
2842     {
2843       n = fnb.length ();
2844       m = fna.length ();
2845     }
2846
2847   ret.create (m);
2848   for (i = 0; i < n; i++)
2849     {
2850       tree type = signed_type_for_types (TREE_TYPE (fna[i]),
2851                                          TREE_TYPE (fnb[i]));
2852       ret.quick_push (fold_build2 (op, type, fna[i], fnb[i]));
2853     }
2854
2855   for (; fna.iterate (i, &coef); i++)
2856     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2857                                  coef, integer_zero_node));
2858   for (; fnb.iterate (i, &coef); i++)
2859     ret.quick_push (fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
2860                                  integer_zero_node, coef));
2861
2862   return ret;
2863 }
2864
2865 /* Returns the sum of affine functions FNA and FNB.  */
2866
2867 static affine_fn
2868 affine_fn_plus (affine_fn fna, affine_fn fnb)
2869 {
2870   return affine_fn_op (PLUS_EXPR, fna, fnb);
2871 }
2872
2873 /* Returns the difference of affine functions FNA and FNB.  */
2874
2875 static affine_fn
2876 affine_fn_minus (affine_fn fna, affine_fn fnb)
2877 {
2878   return affine_fn_op (MINUS_EXPR, fna, fnb);
2879 }
2880
2881 /* Frees affine function FN.  */
2882
2883 static void
2884 affine_fn_free (affine_fn fn)
2885 {
2886   fn.release ();
2887 }
2888
2889 /* Determine for each subscript in the data dependence relation DDR
2890    the distance.  */
2891
2892 static void
2893 compute_subscript_distance (struct data_dependence_relation *ddr)
2894 {
2895   conflict_function *cf_a, *cf_b;
2896   affine_fn fn_a, fn_b, diff;
2897
2898   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
2899     {
2900       unsigned int i;
2901
2902       for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2903         {
2904           struct subscript *subscript;
2905
2906           subscript = DDR_SUBSCRIPT (ddr, i);
2907           cf_a = SUB_CONFLICTS_IN_A (subscript);
2908           cf_b = SUB_CONFLICTS_IN_B (subscript);
2909
2910           fn_a = common_affine_function (cf_a);
2911           fn_b = common_affine_function (cf_b);
2912           if (!fn_a.exists () || !fn_b.exists ())
2913             {
2914               SUB_DISTANCE (subscript) = chrec_dont_know;
2915               return;
2916             }
2917           diff = affine_fn_minus (fn_a, fn_b);
2918
2919           if (affine_function_constant_p (diff))
2920             SUB_DISTANCE (subscript) = affine_function_base (diff);
2921           else
2922             SUB_DISTANCE (subscript) = chrec_dont_know;
2923
2924           affine_fn_free (diff);
2925         }
2926     }
2927 }
2928
2929 /* Returns the conflict function for "unknown".  */
2930
2931 static conflict_function *
2932 conflict_fn_not_known (void)
2933 {
2934   conflict_function *fn = XCNEW (conflict_function);
2935   fn->n = NOT_KNOWN;
2936
2937   return fn;
2938 }
2939
2940 /* Returns the conflict function for "independent".  */
2941
2942 static conflict_function *
2943 conflict_fn_no_dependence (void)
2944 {
2945   conflict_function *fn = XCNEW (conflict_function);
2946   fn->n = NO_DEPENDENCE;
2947
2948   return fn;
2949 }
2950
2951 /* Returns true if the address of OBJ is invariant in LOOP.  */
2952
2953 static bool
2954 object_address_invariant_in_loop_p (const class loop *loop, const_tree obj)
2955 {
2956   while (handled_component_p (obj))
2957     {
2958       if (TREE_CODE (obj) == ARRAY_REF)
2959         {
2960           for (int i = 1; i < 4; ++i)
2961             if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, i),
2962                                                         loop->num))
2963               return false;
2964         }
2965       else if (TREE_CODE (obj) == COMPONENT_REF)
2966         {
2967           if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
2968                                                       loop->num))
2969             return false;
2970         }
2971       obj = TREE_OPERAND (obj, 0);
2972     }
2973
2974   if (!INDIRECT_REF_P (obj)
2975       && TREE_CODE (obj) != MEM_REF)
2976     return true;
2977
2978   return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
2979                                                   loop->num);
2980 }
2981
2982 /* Returns false if we can prove that data references A and B do not alias,
2983    true otherwise.  If LOOP_NEST is false no cross-iteration aliases are
2984    considered.  */
2985
2986 bool
2987 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
2988                 class loop *loop_nest)
2989 {
2990   tree addr_a = DR_BASE_OBJECT (a);
2991   tree addr_b = DR_BASE_OBJECT (b);
2992
2993   /* If we are not processing a loop nest but scalar code we
2994      do not need to care about possible cross-iteration dependences
2995      and thus can process the full original reference.  Do so,
2996      similar to how loop invariant motion applies extra offset-based
2997      disambiguation.  */
2998   if (!loop_nest)
2999     {
3000       tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
3001       tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
3002
3003       if (DR_BASE_ADDRESS (a)
3004           && DR_BASE_ADDRESS (b)
3005           && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
3006           && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
3007           && poly_int_tree_p (tree_size_a)
3008           && poly_int_tree_p (tree_size_b)
3009           && !ranges_maybe_overlap_p (wi::to_poly_widest (DR_INIT (a)),
3010                                       wi::to_poly_widest (tree_size_a),
3011                                       wi::to_poly_widest (DR_INIT (b)),
3012                                       wi::to_poly_widest (tree_size_b)))
3013         {
3014           gcc_assert (integer_zerop (DR_STEP (a))
3015                       && integer_zerop (DR_STEP (b)));
3016           return false;
3017         }
3018
3019       aff_tree off1, off2;
3020       poly_widest_int size1, size2;
3021       get_inner_reference_aff (DR_REF (a), &off1, &size1);
3022       get_inner_reference_aff (DR_REF (b), &off2, &size2);
3023       aff_combination_scale (&off1, -1);
3024       aff_combination_add (&off2, &off1);
3025       if (aff_comb_cannot_overlap_p (&off2, size1, size2))
3026         return false;
3027     }
3028
3029   if ((TREE_CODE (addr_a) == MEM_REF || TREE_CODE (addr_a) == TARGET_MEM_REF)
3030       && (TREE_CODE (addr_b) == MEM_REF || TREE_CODE (addr_b) == TARGET_MEM_REF)
3031       /* For cross-iteration dependences the cliques must be valid for the
3032          whole loop, not just individual iterations.  */
3033       && (!loop_nest
3034           || MR_DEPENDENCE_CLIQUE (addr_a) == 1
3035           || MR_DEPENDENCE_CLIQUE (addr_a) == loop_nest->owned_clique)
3036       && MR_DEPENDENCE_CLIQUE (addr_a) == MR_DEPENDENCE_CLIQUE (addr_b)
3037       && MR_DEPENDENCE_BASE (addr_a) != MR_DEPENDENCE_BASE (addr_b))
3038     return false;
3039
3040   /* If we had an evolution in a pointer-based MEM_REF BASE_OBJECT we
3041      do not know the size of the base-object.  So we cannot do any
3042      offset/overlap based analysis but have to rely on points-to
3043      information only.  */
3044   if (TREE_CODE (addr_a) == MEM_REF
3045       && (DR_UNCONSTRAINED_BASE (a)
3046           || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
3047     {
3048       /* For true dependences we can apply TBAA.  */
3049       if (flag_strict_aliasing
3050           && DR_IS_WRITE (a) && DR_IS_READ (b)
3051           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3052                                      get_alias_set (DR_REF (b))))
3053         return false;
3054       if (TREE_CODE (addr_b) == MEM_REF)
3055         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3056                                        TREE_OPERAND (addr_b, 0));
3057       else
3058         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3059                                        build_fold_addr_expr (addr_b));
3060     }
3061   else if (TREE_CODE (addr_b) == MEM_REF
3062            && (DR_UNCONSTRAINED_BASE (b)
3063                || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
3064     {
3065       /* For true dependences we can apply TBAA.  */
3066       if (flag_strict_aliasing
3067           && DR_IS_WRITE (a) && DR_IS_READ (b)
3068           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3069                                      get_alias_set (DR_REF (b))))
3070         return false;
3071       if (TREE_CODE (addr_a) == MEM_REF)
3072         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3073                                        TREE_OPERAND (addr_b, 0));
3074       else
3075         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3076                                        TREE_OPERAND (addr_b, 0));
3077     }
3078   /* If dr_analyze_innermost failed to handle a component we are
3079      possibly left with a non-base in which case we didn't analyze
3080      a possible evolution of the base when analyzing a loop.  */
3081   else if (loop_nest
3082            && (handled_component_p (addr_a) || handled_component_p (addr_b)))
3083     {
3084       /* For true dependences we can apply TBAA.  */
3085       if (flag_strict_aliasing
3086           && DR_IS_WRITE (a) && DR_IS_READ (b)
3087           && !alias_sets_conflict_p (get_alias_set (DR_REF (a)),
3088                                      get_alias_set (DR_REF (b))))
3089         return false;
3090       if (TREE_CODE (addr_a) == MEM_REF)
3091         return ptr_derefs_may_alias_p (TREE_OPERAND (addr_a, 0),
3092                                        build_fold_addr_expr (addr_b));
3093       else if (TREE_CODE (addr_b) == MEM_REF)
3094         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3095                                        TREE_OPERAND (addr_b, 0));
3096       else
3097         return ptr_derefs_may_alias_p (build_fold_addr_expr (addr_a),
3098                                        build_fold_addr_expr (addr_b));
3099     }
3100
3101   /* Otherwise DR_BASE_OBJECT is an access that covers the whole object
3102      that is being subsetted in the loop nest.  */
3103   if (DR_IS_WRITE (a) && DR_IS_WRITE (b))
3104     return refs_output_dependent_p (addr_a, addr_b);
3105   else if (DR_IS_READ (a) && DR_IS_WRITE (b))
3106     return refs_anti_dependent_p (addr_a, addr_b);
3107   return refs_may_alias_p (addr_a, addr_b);
3108 }
3109
3110 /* REF_A and REF_B both satisfy access_fn_component_p.  Return true
3111    if it is meaningful to compare their associated access functions
3112    when checking for dependencies.  */
3113
3114 static bool
3115 access_fn_components_comparable_p (tree ref_a, tree ref_b)
3116 {
3117   /* Allow pairs of component refs from the following sets:
3118
3119        { REALPART_EXPR, IMAGPART_EXPR }
3120        { COMPONENT_REF }
3121        { ARRAY_REF }.  */
3122   tree_code code_a = TREE_CODE (ref_a);
3123   tree_code code_b = TREE_CODE (ref_b);
3124   if (code_a == IMAGPART_EXPR)
3125     code_a = REALPART_EXPR;
3126   if (code_b == IMAGPART_EXPR)
3127     code_b = REALPART_EXPR;
3128   if (code_a != code_b)
3129     return false;
3130
3131   if (TREE_CODE (ref_a) == COMPONENT_REF)
3132     /* ??? We cannot simply use the type of operand #0 of the refs here as
3133        the Fortran compiler smuggles type punning into COMPONENT_REFs.
3134        Use the DECL_CONTEXT of the FIELD_DECLs instead.  */
3135     return (DECL_CONTEXT (TREE_OPERAND (ref_a, 1))
3136             == DECL_CONTEXT (TREE_OPERAND (ref_b, 1)));
3137
3138   return types_compatible_p (TREE_TYPE (TREE_OPERAND (ref_a, 0)),
3139                              TREE_TYPE (TREE_OPERAND (ref_b, 0)));
3140 }
3141
3142 /* Initialize a data dependence relation RES in LOOP_NEST.  USE_ALT_INDICES
3143    is true when the main indices of A and B were not comparable so we try again
3144    with alternate indices computed on an indirect reference.  */
3145
3146 struct data_dependence_relation *
3147 initialize_data_dependence_relation (struct data_dependence_relation *res,
3148                                      vec<loop_p> loop_nest,
3149                                      bool use_alt_indices)
3150 {
3151   struct data_reference *a = DDR_A (res);
3152   struct data_reference *b = DDR_B (res);
3153   unsigned int i;
3154
3155   struct indices *indices_a = &a->indices;
3156   struct indices *indices_b = &b->indices;
3157   if (use_alt_indices)
3158     {
3159       if (TREE_CODE (DR_REF (a)) != MEM_REF)
3160         indices_a = &a->alt_indices;
3161       if (TREE_CODE (DR_REF (b)) != MEM_REF)
3162         indices_b = &b->alt_indices;
3163     }
3164   unsigned int num_dimensions_a = indices_a->access_fns.length ();
3165   unsigned int num_dimensions_b = indices_b->access_fns.length ();
3166   if (num_dimensions_a == 0 || num_dimensions_b == 0)
3167     {
3168       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3169       return res;
3170     }
3171
3172   /* For unconstrained bases, the root (highest-indexed) subscript
3173      describes a variation in the base of the original DR_REF rather
3174      than a component access.  We have no type that accurately describes
3175      the new DR_BASE_OBJECT (whose TREE_TYPE describes the type *after*
3176      applying this subscript) so limit the search to the last real
3177      component access.
3178
3179      E.g. for:
3180
3181         void
3182         f (int a[][8], int b[][8])
3183         {
3184           for (int i = 0; i < 8; ++i)
3185             a[i * 2][0] = b[i][0];
3186         }
3187
3188      the a and b accesses have a single ARRAY_REF component reference [0]
3189      but have two subscripts.  */
3190   if (indices_a->unconstrained_base)
3191     num_dimensions_a -= 1;
3192   if (indices_b->unconstrained_base)
3193     num_dimensions_b -= 1;
3194
3195   /* These structures describe sequences of component references in
3196      DR_REF (A) and DR_REF (B).  Each component reference is tied to a
3197      specific access function.  */
3198   struct {
3199     /* The sequence starts at DR_ACCESS_FN (A, START_A) of A and
3200        DR_ACCESS_FN (B, START_B) of B (inclusive) and extends to higher
3201        indices.  In C notation, these are the indices of the rightmost
3202        component references; e.g. for a sequence .b.c.d, the start
3203        index is for .d.  */
3204     unsigned int start_a;
3205     unsigned int start_b;
3206
3207     /* The sequence contains LENGTH consecutive access functions from
3208        each DR.  */
3209     unsigned int length;
3210
3211     /* The enclosing objects for the A and B sequences respectively,
3212        i.e. the objects to which DR_ACCESS_FN (A, START_A + LENGTH - 1)
3213        and DR_ACCESS_FN (B, START_B + LENGTH - 1) are applied.  */
3214     tree object_a;
3215     tree object_b;
3216   } full_seq = {}, struct_seq = {};
3217
3218   /* Before each iteration of the loop:
3219
3220      - REF_A is what you get after applying DR_ACCESS_FN (A, INDEX_A) and
3221      - REF_B is what you get after applying DR_ACCESS_FN (B, INDEX_B).  */
3222   unsigned int index_a = 0;
3223   unsigned int index_b = 0;
3224   tree ref_a = DR_REF (a);
3225   tree ref_b = DR_REF (b);
3226
3227   /* Now walk the component references from the final DR_REFs back up to
3228      the enclosing base objects.  Each component reference corresponds
3229      to one access function in the DR, with access function 0 being for
3230      the final DR_REF and the highest-indexed access function being the
3231      one that is applied to the base of the DR.
3232
3233      Look for a sequence of component references whose access functions
3234      are comparable (see access_fn_components_comparable_p).  If more
3235      than one such sequence exists, pick the one nearest the base
3236      (which is the leftmost sequence in C notation).  Store this sequence
3237      in FULL_SEQ.
3238
3239      For example, if we have:
3240
3241         struct foo { struct bar s; ... } (*a)[10], (*b)[10];
3242
3243         A: a[0][i].s.c.d
3244         B: __real b[0][i].s.e[i].f
3245
3246      (where d is the same type as the real component of f) then the access
3247      functions would be:
3248
3249                          0   1   2   3
3250         A:              .d  .c  .s [i]
3251
3252                  0   1   2   3   4   5
3253         B:  __real  .f [i]  .e  .s [i]
3254
3255      The A0/B2 column isn't comparable, since .d is a COMPONENT_REF
3256      and [i] is an ARRAY_REF.  However, the A1/B3 column contains two
3257      COMPONENT_REF accesses for struct bar, so is comparable.  Likewise
3258      the A2/B4 column contains two COMPONENT_REF accesses for struct foo,
3259      so is comparable.  The A3/B5 column contains two ARRAY_REFs that
3260      index foo[10] arrays, so is again comparable.  The sequence is
3261      therefore:
3262
3263         A: [1, 3]  (i.e. [i].s.c)
3264         B: [3, 5]  (i.e. [i].s.e)
3265
3266      Also look for sequences of component references whose access
3267      functions are comparable and whose enclosing objects have the same
3268      RECORD_TYPE.  Store this sequence in STRUCT_SEQ.  In the above
3269      example, STRUCT_SEQ would be:
3270
3271         A: [1, 2]  (i.e. s.c)
3272         B: [3, 4]  (i.e. s.e)  */
3273   while (index_a < num_dimensions_a && index_b < num_dimensions_b)
3274     {
3275       /* The alternate indices form always has a single dimension
3276          with unconstrained base.  */
3277       gcc_assert (!use_alt_indices);
3278
3279       /* REF_A and REF_B must be one of the component access types
3280          allowed by dr_analyze_indices.  */
3281       gcc_checking_assert (access_fn_component_p (ref_a));
3282       gcc_checking_assert (access_fn_component_p (ref_b));
3283
3284       /* Get the immediately-enclosing objects for REF_A and REF_B,
3285          i.e. the references *before* applying DR_ACCESS_FN (A, INDEX_A)
3286          and DR_ACCESS_FN (B, INDEX_B).  */
3287       tree object_a = TREE_OPERAND (ref_a, 0);
3288       tree object_b = TREE_OPERAND (ref_b, 0);
3289
3290       tree type_a = TREE_TYPE (object_a);
3291       tree type_b = TREE_TYPE (object_b);
3292       if (access_fn_components_comparable_p (ref_a, ref_b))
3293         {
3294           /* This pair of component accesses is comparable for dependence
3295              analysis, so we can include DR_ACCESS_FN (A, INDEX_A) and
3296              DR_ACCESS_FN (B, INDEX_B) in the sequence.  */
3297           if (full_seq.start_a + full_seq.length != index_a
3298               || full_seq.start_b + full_seq.length != index_b)
3299             {
3300               /* The accesses don't extend the current sequence,
3301                  so start a new one here.  */
3302               full_seq.start_a = index_a;
3303               full_seq.start_b = index_b;
3304               full_seq.length = 0;
3305             }
3306
3307           /* Add this pair of references to the sequence.  */
3308           full_seq.length += 1;
3309           full_seq.object_a = object_a;
3310           full_seq.object_b = object_b;
3311
3312           /* If the enclosing objects are structures (and thus have the
3313              same RECORD_TYPE), record the new sequence in STRUCT_SEQ.  */
3314           if (TREE_CODE (type_a) == RECORD_TYPE)
3315             struct_seq = full_seq;
3316
3317           /* Move to the next containing reference for both A and B.  */
3318           ref_a = object_a;
3319           ref_b = object_b;
3320           index_a += 1;
3321           index_b += 1;
3322           continue;
3323         }
3324
3325       /* Try to approach equal type sizes.  */
3326       if (!COMPLETE_TYPE_P (type_a)
3327           || !COMPLETE_TYPE_P (type_b)
3328           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_a))
3329           || !tree_fits_uhwi_p (TYPE_SIZE_UNIT (type_b)))
3330         break;
3331
3332       unsigned HOST_WIDE_INT size_a = tree_to_uhwi (TYPE_SIZE_UNIT (type_a));
3333       unsigned HOST_WIDE_INT size_b = tree_to_uhwi (TYPE_SIZE_UNIT (type_b));
3334       if (size_a <= size_b)
3335         {
3336           index_a += 1;
3337           ref_a = object_a;
3338         }
3339       if (size_b <= size_a)
3340         {
3341           index_b += 1;
3342           ref_b = object_b;
3343         }
3344     }
3345
3346   /* See whether FULL_SEQ ends at the base and whether the two bases
3347      are equal.  We do not care about TBAA or alignment info so we can
3348      use OEP_ADDRESS_OF to avoid false negatives.  */
3349   tree base_a = indices_a->base_object;
3350   tree base_b = indices_b->base_object;
3351   bool same_base_p = (full_seq.start_a + full_seq.length == num_dimensions_a
3352                       && full_seq.start_b + full_seq.length == num_dimensions_b
3353                       && (indices_a->unconstrained_base
3354                           == indices_b->unconstrained_base)
3355                       && operand_equal_p (base_a, base_b, OEP_ADDRESS_OF)
3356                       && (types_compatible_p (TREE_TYPE (base_a),
3357                                               TREE_TYPE (base_b))
3358                           || (!base_supports_access_fn_components_p (base_a)
3359                               && !base_supports_access_fn_components_p (base_b)
3360                               && operand_equal_p
3361                                    (TYPE_SIZE (TREE_TYPE (base_a)),
3362                                     TYPE_SIZE (TREE_TYPE (base_b)), 0)))
3363                       && (!loop_nest.exists ()
3364                           || (object_address_invariant_in_loop_p
3365                               (loop_nest[0], base_a))));
3366
3367   /* If the bases are the same, we can include the base variation too.
3368      E.g. the b accesses in:
3369
3370        for (int i = 0; i < n; ++i)
3371          b[i + 4][0] = b[i][0];
3372
3373      have a definite dependence distance of 4, while for:
3374
3375        for (int i = 0; i < n; ++i)
3376          a[i + 4][0] = b[i][0];
3377
3378      the dependence distance depends on the gap between a and b.
3379
3380      If the bases are different then we can only rely on the sequence
3381      rooted at a structure access, since arrays are allowed to overlap
3382      arbitrarily and change shape arbitrarily.  E.g. we treat this as
3383      valid code:
3384
3385        int a[256];
3386        ...
3387        ((int (*)[4][3]) &a[1])[i][0] += ((int (*)[4][3]) &a[2])[i][0];
3388
3389      where two lvalues with the same int[4][3] type overlap, and where
3390      both lvalues are distinct from the object's declared type.  */
3391   if (same_base_p)
3392     {
3393       if (indices_a->unconstrained_base)
3394         full_seq.length += 1;
3395     }
3396   else
3397     full_seq = struct_seq;
3398
3399   /* Punt if we didn't find a suitable sequence.  */
3400   if (full_seq.length == 0)
3401     {
3402       if (use_alt_indices
3403           || (TREE_CODE (DR_REF (a)) == MEM_REF
3404               && TREE_CODE (DR_REF (b)) == MEM_REF)
3405           || may_be_nonaddressable_p (DR_REF (a))
3406           || may_be_nonaddressable_p (DR_REF (b)))
3407         {
3408           /* Fully exhausted possibilities.  */
3409           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3410           return res;
3411         }
3412
3413       /* Try evaluating both DRs as dereferences of pointers.  */
3414       if (!a->alt_indices.base_object
3415           && TREE_CODE (DR_REF (a)) != MEM_REF)
3416         {
3417           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (a)),
3418                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (a)),
3419                                  build_int_cst
3420                                    (reference_alias_ptr_type (DR_REF (a)), 0));
3421           dr_analyze_indices (&a->alt_indices, alt_ref,
3422                               loop_preheader_edge (loop_nest[0]),
3423                               loop_containing_stmt (DR_STMT (a)));
3424         }
3425       if (!b->alt_indices.base_object
3426           && TREE_CODE (DR_REF (b)) != MEM_REF)
3427         {
3428           tree alt_ref = build2 (MEM_REF, TREE_TYPE (DR_REF (b)),
3429                                  build1 (ADDR_EXPR, ptr_type_node, DR_REF (b)),
3430                                  build_int_cst
3431                                    (reference_alias_ptr_type (DR_REF (b)), 0));
3432           dr_analyze_indices (&b->alt_indices, alt_ref,
3433                               loop_preheader_edge (loop_nest[0]),
3434                               loop_containing_stmt (DR_STMT (b)));
3435         }
3436       return initialize_data_dependence_relation (res, loop_nest, true);
3437     }
3438
3439   if (!same_base_p)
3440     {
3441       /* Partial overlap is possible for different bases when strict aliasing
3442          is not in effect.  It's also possible if either base involves a union
3443          access; e.g. for:
3444
3445            struct s1 { int a[2]; };
3446            struct s2 { struct s1 b; int c; };
3447            struct s3 { int d; struct s1 e; };
3448            union u { struct s2 f; struct s3 g; } *p, *q;
3449
3450          the s1 at "p->f.b" (base "p->f") partially overlaps the s1 at
3451          "p->g.e" (base "p->g") and might partially overlap the s1 at
3452          "q->g.e" (base "q->g").  */
3453       if (!flag_strict_aliasing
3454           || ref_contains_union_access_p (full_seq.object_a)
3455           || ref_contains_union_access_p (full_seq.object_b))
3456         {
3457           DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3458           return res;
3459         }
3460
3461       DDR_COULD_BE_INDEPENDENT_P (res) = true;
3462       if (!loop_nest.exists ()
3463           || (object_address_invariant_in_loop_p (loop_nest[0],
3464                                                   full_seq.object_a)
3465               && object_address_invariant_in_loop_p (loop_nest[0],
3466                                                      full_seq.object_b)))
3467         {
3468           DDR_OBJECT_A (res) = full_seq.object_a;
3469           DDR_OBJECT_B (res) = full_seq.object_b;
3470         }
3471     }
3472
3473   DDR_AFFINE_P (res) = true;
3474   DDR_ARE_DEPENDENT (res) = NULL_TREE;
3475   DDR_SUBSCRIPTS (res).create (full_seq.length);
3476   DDR_LOOP_NEST (res) = loop_nest;
3477   DDR_SELF_REFERENCE (res) = false;
3478
3479   for (i = 0; i < full_seq.length; ++i)
3480     {
3481       struct subscript *subscript;
3482
3483       subscript = XNEW (struct subscript);
3484       SUB_ACCESS_FN (subscript, 0) = indices_a->access_fns[full_seq.start_a + i];
3485       SUB_ACCESS_FN (subscript, 1) = indices_b->access_fns[full_seq.start_b + i];
3486       SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
3487       SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
3488       SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3489       SUB_DISTANCE (subscript) = chrec_dont_know;
3490       DDR_SUBSCRIPTS (res).safe_push (subscript);
3491     }
3492
3493   return res;
3494 }
3495
3496 /* Initialize a data dependence relation between data accesses A and
3497    B.  NB_LOOPS is the number of loops surrounding the references: the
3498    size of the classic distance/direction vectors.  */
3499
3500 struct data_dependence_relation *
3501 initialize_data_dependence_relation (struct data_reference *a,
3502                                      struct data_reference *b,
3503                                      vec<loop_p> loop_nest)
3504 {
3505   data_dependence_relation *res = XCNEW (struct data_dependence_relation);
3506   DDR_A (res) = a;
3507   DDR_B (res) = b;
3508   DDR_LOOP_NEST (res).create (0);
3509   DDR_SUBSCRIPTS (res).create (0);
3510   DDR_DIR_VECTS (res).create (0);
3511   DDR_DIST_VECTS (res).create (0);
3512
3513   if (a == NULL || b == NULL)
3514     {
3515       DDR_ARE_DEPENDENT (res) = chrec_dont_know;
3516       return res;
3517     }
3518
3519   /* If the data references do not alias, then they are independent.  */
3520   if (!dr_may_alias_p (a, b, loop_nest.exists () ? loop_nest[0] : NULL))
3521     {
3522       DDR_ARE_DEPENDENT (res) = chrec_known;
3523       return res;
3524     }
3525
3526   return initialize_data_dependence_relation (res, loop_nest, false);
3527 }
3528
3529
3530 /* Frees memory used by the conflict function F.  */
3531
3532 static void
3533 free_conflict_function (conflict_function *f)
3534 {
3535   unsigned i;
3536
3537   if (CF_NONTRIVIAL_P (f))
3538     {
3539       for (i = 0; i < f->n; i++)
3540         affine_fn_free (f->fns[i]);
3541     }
3542   free (f);
3543 }
3544
3545 /* Frees memory used by SUBSCRIPTS.  */
3546
3547 static void
3548 free_subscripts (vec<subscript_p> subscripts)
3549 {
3550   for (subscript_p s : subscripts)
3551     {
3552       free_conflict_function (s->conflicting_iterations_in_a);
3553       free_conflict_function (s->conflicting_iterations_in_b);
3554       free (s);
3555     }
3556   subscripts.release ();
3557 }
3558
3559 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
3560    description.  */
3561
3562 static inline void
3563 finalize_ddr_dependent (struct data_dependence_relation *ddr,
3564                         tree chrec)
3565 {
3566   DDR_ARE_DEPENDENT (ddr) = chrec;
3567   free_subscripts (DDR_SUBSCRIPTS (ddr));
3568   DDR_SUBSCRIPTS (ddr).create (0);
3569 }
3570
3571 /* The dependence relation DDR cannot be represented by a distance
3572    vector.  */
3573
3574 static inline void
3575 non_affine_dependence_relation (struct data_dependence_relation *ddr)
3576 {
3577   if (dump_file && (dump_flags & TDF_DETAILS))
3578     fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
3579
3580   DDR_AFFINE_P (ddr) = false;
3581 }
3582
3583 \f
3584
3585 /* This section contains the classic Banerjee tests.  */
3586
3587 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
3588    variables, i.e., if the ZIV (Zero Index Variable) test is true.  */
3589
3590 static inline bool
3591 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3592 {
3593   return (evolution_function_is_constant_p (chrec_a)
3594           && evolution_function_is_constant_p (chrec_b));
3595 }
3596
3597 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
3598    variable, i.e., if the SIV (Single Index Variable) test is true.  */
3599
3600 static bool
3601 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
3602 {
3603   if ((evolution_function_is_constant_p (chrec_a)
3604        && evolution_function_is_univariate_p (chrec_b))
3605       || (evolution_function_is_constant_p (chrec_b)
3606           && evolution_function_is_univariate_p (chrec_a)))
3607     return true;
3608
3609   if (evolution_function_is_univariate_p (chrec_a)
3610       && evolution_function_is_univariate_p (chrec_b))
3611     {
3612       switch (TREE_CODE (chrec_a))
3613         {
3614         case POLYNOMIAL_CHREC:
3615           switch (TREE_CODE (chrec_b))
3616             {
3617             case POLYNOMIAL_CHREC:
3618               if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
3619                 return false;
3620               /* FALLTHRU */
3621
3622             default:
3623               return true;
3624             }
3625
3626         default:
3627           return true;
3628         }
3629     }
3630
3631   return false;
3632 }
3633
3634 /* Creates a conflict function with N dimensions.  The affine functions
3635    in each dimension follow.  */
3636
3637 static conflict_function *
3638 conflict_fn (unsigned n, ...)
3639 {
3640   unsigned i;
3641   conflict_function *ret = XCNEW (conflict_function);
3642   va_list ap;
3643
3644   gcc_assert (n > 0 && n <= MAX_DIM);
3645   va_start (ap, n);
3646
3647   ret->n = n;
3648   for (i = 0; i < n; i++)
3649     ret->fns[i] = va_arg (ap, affine_fn);
3650   va_end (ap);
3651
3652   return ret;
3653 }
3654
3655 /* Returns constant affine function with value CST.  */
3656
3657 static affine_fn
3658 affine_fn_cst (tree cst)
3659 {
3660   affine_fn fn;
3661   fn.create (1);
3662   fn.quick_push (cst);
3663   return fn;
3664 }
3665
3666 /* Returns affine function with single variable, CST + COEF * x_DIM.  */
3667
3668 static affine_fn
3669 affine_fn_univar (tree cst, unsigned dim, tree coef)
3670 {
3671   affine_fn fn;
3672   fn.create (dim + 1);
3673   unsigned i;
3674
3675   gcc_assert (dim > 0);
3676   fn.quick_push (cst);
3677   for (i = 1; i < dim; i++)
3678     fn.quick_push (integer_zero_node);
3679   fn.quick_push (coef);
3680   return fn;
3681 }
3682
3683 /* Analyze a ZIV (Zero Index Variable) subscript.  *OVERLAPS_A and
3684    *OVERLAPS_B are initialized to the functions that describe the
3685    relation between the elements accessed twice by CHREC_A and
3686    CHREC_B.  For k >= 0, the following property is verified:
3687
3688    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3689
3690 static void
3691 analyze_ziv_subscript (tree chrec_a,
3692                        tree chrec_b,
3693                        conflict_function **overlaps_a,
3694                        conflict_function **overlaps_b,
3695                        tree *last_conflicts)
3696 {
3697   tree type, difference;
3698   dependence_stats.num_ziv++;
3699
3700   if (dump_file && (dump_flags & TDF_DETAILS))
3701     fprintf (dump_file, "(analyze_ziv_subscript \n");
3702
3703   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3704   chrec_a = chrec_convert (type, chrec_a, NULL);
3705   chrec_b = chrec_convert (type, chrec_b, NULL);
3706   difference = chrec_fold_minus (type, chrec_a, chrec_b);
3707
3708   switch (TREE_CODE (difference))
3709     {
3710     case INTEGER_CST:
3711       if (integer_zerop (difference))
3712         {
3713           /* The difference is equal to zero: the accessed index
3714              overlaps for each iteration in the loop.  */
3715           *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3716           *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3717           *last_conflicts = chrec_dont_know;
3718           dependence_stats.num_ziv_dependent++;
3719         }
3720       else
3721         {
3722           /* The accesses do not overlap.  */
3723           *overlaps_a = conflict_fn_no_dependence ();
3724           *overlaps_b = conflict_fn_no_dependence ();
3725           *last_conflicts = integer_zero_node;
3726           dependence_stats.num_ziv_independent++;
3727         }
3728       break;
3729
3730     default:
3731       /* We're not sure whether the indexes overlap.  For the moment,
3732          conservatively answer "don't know".  */
3733       if (dump_file && (dump_flags & TDF_DETAILS))
3734         fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
3735
3736       *overlaps_a = conflict_fn_not_known ();
3737       *overlaps_b = conflict_fn_not_known ();
3738       *last_conflicts = chrec_dont_know;
3739       dependence_stats.num_ziv_unimplemented++;
3740       break;
3741     }
3742
3743   if (dump_file && (dump_flags & TDF_DETAILS))
3744     fprintf (dump_file, ")\n");
3745 }
3746
3747 /* Similar to max_stmt_executions_int, but returns the bound as a tree,
3748    and only if it fits to the int type.  If this is not the case, or the
3749    bound  on the number of iterations of LOOP could not be derived, returns
3750    chrec_dont_know.  */
3751
3752 static tree
3753 max_stmt_executions_tree (class loop *loop)
3754 {
3755   widest_int nit;
3756
3757   if (!max_stmt_executions (loop, &nit))
3758     return chrec_dont_know;
3759
3760   if (!wi::fits_to_tree_p (nit, unsigned_type_node))
3761     return chrec_dont_know;
3762
3763   return wide_int_to_tree (unsigned_type_node, nit);
3764 }
3765
3766 /* Determine whether the CHREC is always positive/negative.  If the expression
3767    cannot be statically analyzed, return false, otherwise set the answer into
3768    VALUE.  */
3769
3770 static bool
3771 chrec_is_positive (tree chrec, bool *value)
3772 {
3773   bool value0, value1, value2;
3774   tree end_value, nb_iter;
3775
3776   switch (TREE_CODE (chrec))
3777     {
3778     case POLYNOMIAL_CHREC:
3779       if (!chrec_is_positive (CHREC_LEFT (chrec), &value0)
3780           || !chrec_is_positive (CHREC_RIGHT (chrec), &value1))
3781         return false;
3782
3783       /* FIXME -- overflows.  */
3784       if (value0 == value1)
3785         {
3786           *value = value0;
3787           return true;
3788         }
3789
3790       /* Otherwise the chrec is under the form: "{-197, +, 2}_1",
3791          and the proof consists in showing that the sign never
3792          changes during the execution of the loop, from 0 to
3793          loop->nb_iterations.  */
3794       if (!evolution_function_is_affine_p (chrec))
3795         return false;
3796
3797       nb_iter = number_of_latch_executions (get_chrec_loop (chrec));
3798       if (chrec_contains_undetermined (nb_iter))
3799         return false;
3800
3801 #if 0
3802       /* TODO -- If the test is after the exit, we may decrease the number of
3803          iterations by one.  */
3804       if (after_exit)
3805         nb_iter = chrec_fold_minus (type, nb_iter, build_int_cst (type, 1));
3806 #endif
3807
3808       end_value = chrec_apply (CHREC_VARIABLE (chrec), chrec, nb_iter);
3809
3810       if (!chrec_is_positive (end_value, &value2))
3811         return false;
3812
3813       *value = value0;
3814       return value0 == value1;
3815
3816     case INTEGER_CST:
3817       switch (tree_int_cst_sgn (chrec))
3818         {
3819         case -1:
3820           *value = false;
3821           break;
3822         case 1:
3823           *value = true;
3824           break;
3825         default:
3826           return false;
3827         }
3828       return true;
3829
3830     default:
3831       return false;
3832     }
3833 }
3834
3835
3836 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
3837    constant, and CHREC_B is an affine function.  *OVERLAPS_A and
3838    *OVERLAPS_B are initialized to the functions that describe the
3839    relation between the elements accessed twice by CHREC_A and
3840    CHREC_B.  For k >= 0, the following property is verified:
3841
3842    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
3843
3844 static void
3845 analyze_siv_subscript_cst_affine (tree chrec_a,
3846                                   tree chrec_b,
3847                                   conflict_function **overlaps_a,
3848                                   conflict_function **overlaps_b,
3849                                   tree *last_conflicts)
3850 {
3851   bool value0, value1, value2;
3852   tree type, difference, tmp;
3853
3854   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
3855   chrec_a = chrec_convert (type, chrec_a, NULL);
3856   chrec_b = chrec_convert (type, chrec_b, NULL);
3857   difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
3858
3859   /* Special case overlap in the first iteration.  */
3860   if (integer_zerop (difference))
3861     {
3862       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3863       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
3864       *last_conflicts = integer_one_node;
3865       return;
3866     }
3867
3868   if (!chrec_is_positive (initial_condition (difference), &value0))
3869     {
3870       if (dump_file && (dump_flags & TDF_DETAILS))
3871         fprintf (dump_file, "siv test failed: chrec is not positive.\n");
3872
3873       dependence_stats.num_siv_unimplemented++;
3874       *overlaps_a = conflict_fn_not_known ();
3875       *overlaps_b = conflict_fn_not_known ();
3876       *last_conflicts = chrec_dont_know;
3877       return;
3878     }
3879   else
3880     {
3881       if (value0 == false)
3882         {
3883           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3884               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
3885             {
3886               if (dump_file && (dump_flags & TDF_DETAILS))
3887                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3888
3889               *overlaps_a = conflict_fn_not_known ();
3890               *overlaps_b = conflict_fn_not_known ();
3891               *last_conflicts = chrec_dont_know;
3892               dependence_stats.num_siv_unimplemented++;
3893               return;
3894             }
3895           else
3896             {
3897               if (value1 == true)
3898                 {
3899                   /* Example:
3900                      chrec_a = 12
3901                      chrec_b = {10, +, 1}
3902                   */
3903
3904                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3905                     {
3906                       HOST_WIDE_INT numiter;
3907                       class loop *loop = get_chrec_loop (chrec_b);
3908
3909                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3910                       tmp = fold_build2 (EXACT_DIV_EXPR, type,
3911                                          fold_build1 (ABS_EXPR, type, difference),
3912                                          CHREC_RIGHT (chrec_b));
3913                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3914                       *last_conflicts = integer_one_node;
3915
3916
3917                       /* Perform weak-zero siv test to see if overlap is
3918                          outside the loop bounds.  */
3919                       numiter = max_stmt_executions_int (loop);
3920
3921                       if (numiter >= 0
3922                           && compare_tree_int (tmp, numiter) > 0)
3923                         {
3924                           free_conflict_function (*overlaps_a);
3925                           free_conflict_function (*overlaps_b);
3926                           *overlaps_a = conflict_fn_no_dependence ();
3927                           *overlaps_b = conflict_fn_no_dependence ();
3928                           *last_conflicts = integer_zero_node;
3929                           dependence_stats.num_siv_independent++;
3930                           return;
3931                         }
3932                       dependence_stats.num_siv_dependent++;
3933                       return;
3934                     }
3935
3936                   /* When the step does not divide the difference, there are
3937                      no overlaps.  */
3938                   else
3939                     {
3940                       *overlaps_a = conflict_fn_no_dependence ();
3941                       *overlaps_b = conflict_fn_no_dependence ();
3942                       *last_conflicts = integer_zero_node;
3943                       dependence_stats.num_siv_independent++;
3944                       return;
3945                     }
3946                 }
3947
3948               else
3949                 {
3950                   /* Example:
3951                      chrec_a = 12
3952                      chrec_b = {10, +, -1}
3953
3954                      In this case, chrec_a will not overlap with chrec_b.  */
3955                   *overlaps_a = conflict_fn_no_dependence ();
3956                   *overlaps_b = conflict_fn_no_dependence ();
3957                   *last_conflicts = integer_zero_node;
3958                   dependence_stats.num_siv_independent++;
3959                   return;
3960                 }
3961             }
3962         }
3963       else
3964         {
3965           if (TREE_CODE (chrec_b) != POLYNOMIAL_CHREC
3966               || !chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
3967             {
3968               if (dump_file && (dump_flags & TDF_DETAILS))
3969                 fprintf (dump_file, "siv test failed: chrec not positive.\n");
3970
3971               *overlaps_a = conflict_fn_not_known ();
3972               *overlaps_b = conflict_fn_not_known ();
3973               *last_conflicts = chrec_dont_know;
3974               dependence_stats.num_siv_unimplemented++;
3975               return;
3976             }
3977           else
3978             {
3979               if (value2 == false)
3980                 {
3981                   /* Example:
3982                      chrec_a = 3
3983                      chrec_b = {10, +, -1}
3984                   */
3985                   if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
3986                     {
3987                       HOST_WIDE_INT numiter;
3988                       class loop *loop = get_chrec_loop (chrec_b);
3989
3990                       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
3991                       tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
3992                                          CHREC_RIGHT (chrec_b));
3993                       *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
3994                       *last_conflicts = integer_one_node;
3995
3996                       /* Perform weak-zero siv test to see if overlap is
3997                          outside the loop bounds.  */
3998                       numiter = max_stmt_executions_int (loop);
3999
4000                       if (numiter >= 0
4001                           && compare_tree_int (tmp, numiter) > 0)
4002                         {
4003                           free_conflict_function (*overlaps_a);
4004                           free_conflict_function (*overlaps_b);
4005                           *overlaps_a = conflict_fn_no_dependence ();
4006                           *overlaps_b = conflict_fn_no_dependence ();
4007                           *last_conflicts = integer_zero_node;
4008                           dependence_stats.num_siv_independent++;
4009                           return;
4010                         }
4011                       dependence_stats.num_siv_dependent++;
4012                       return;
4013                     }
4014
4015                   /* When the step does not divide the difference, there
4016                      are no overlaps.  */
4017                   else
4018                     {
4019                       *overlaps_a = conflict_fn_no_dependence ();
4020                       *overlaps_b = conflict_fn_no_dependence ();
4021                       *last_conflicts = integer_zero_node;
4022                       dependence_stats.num_siv_independent++;
4023                       return;
4024                     }
4025                 }
4026               else
4027                 {
4028                   /* Example:
4029                      chrec_a = 3
4030                      chrec_b = {4, +, 1}
4031
4032                      In this case, chrec_a will not overlap with chrec_b.  */
4033                   *overlaps_a = conflict_fn_no_dependence ();
4034                   *overlaps_b = conflict_fn_no_dependence ();
4035                   *last_conflicts = integer_zero_node;
4036                   dependence_stats.num_siv_independent++;
4037                   return;
4038                 }
4039             }
4040         }
4041     }
4042 }
4043
4044 /* Helper recursive function for initializing the matrix A.  Returns
4045    the initial value of CHREC.  */
4046
4047 static tree
4048 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
4049 {
4050   gcc_assert (chrec);
4051
4052   switch (TREE_CODE (chrec))
4053     {
4054     case POLYNOMIAL_CHREC:
4055       HOST_WIDE_INT chrec_right;
4056       if (!cst_and_fits_in_hwi (CHREC_RIGHT (chrec)))
4057         return chrec_dont_know;
4058       chrec_right = int_cst_value (CHREC_RIGHT (chrec));
4059       /* We want to be able to negate without overflow.  */
4060       if (chrec_right == HOST_WIDE_INT_MIN)
4061         return chrec_dont_know;
4062       A[index][0] = mult * chrec_right;
4063       return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
4064
4065     case PLUS_EXPR:
4066     case MULT_EXPR:
4067     case MINUS_EXPR:
4068       {
4069         tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4070         tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
4071
4072         return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
4073       }
4074
4075     CASE_CONVERT:
4076       {
4077         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4078         return chrec_convert (chrec_type (chrec), op, NULL);
4079       }
4080
4081     case BIT_NOT_EXPR:
4082       {
4083         /* Handle ~X as -1 - X.  */
4084         tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
4085         return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
4086                               build_int_cst (TREE_TYPE (chrec), -1), op);
4087       }
4088
4089     case INTEGER_CST:
4090       return chrec;
4091
4092     default:
4093       gcc_unreachable ();
4094       return NULL_TREE;
4095     }
4096 }
4097
4098 #define FLOOR_DIV(x,y) ((x) / (y))
4099
4100 /* Solves the special case of the Diophantine equation:
4101    | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
4102
4103    Computes the descriptions OVERLAPS_A and OVERLAPS_B.  NITER is the
4104    number of iterations that loops X and Y run.  The overlaps will be
4105    constructed as evolutions in dimension DIM.  */
4106
4107 static void
4108 compute_overlap_steps_for_affine_univar (HOST_WIDE_INT niter,
4109                                          HOST_WIDE_INT step_a,
4110                                          HOST_WIDE_INT step_b,
4111                                          affine_fn *overlaps_a,
4112                                          affine_fn *overlaps_b,
4113                                          tree *last_conflicts, int dim)
4114 {
4115   if (((step_a > 0 && step_b > 0)
4116        || (step_a < 0 && step_b < 0)))
4117     {
4118       HOST_WIDE_INT step_overlaps_a, step_overlaps_b;
4119       HOST_WIDE_INT gcd_steps_a_b, last_conflict, tau2;
4120
4121       gcd_steps_a_b = gcd (step_a, step_b);
4122       step_overlaps_a = step_b / gcd_steps_a_b;
4123       step_overlaps_b = step_a / gcd_steps_a_b;
4124
4125       if (niter > 0)
4126         {
4127           tau2 = FLOOR_DIV (niter, step_overlaps_a);
4128           tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
4129           last_conflict = tau2;
4130           *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
4131         }
4132       else
4133         *last_conflicts = chrec_dont_know;
4134
4135       *overlaps_a = affine_fn_univar (integer_zero_node, dim,
4136                                       build_int_cst (NULL_TREE,
4137                                                      step_overlaps_a));
4138       *overlaps_b = affine_fn_univar (integer_zero_node, dim,
4139                                       build_int_cst (NULL_TREE,
4140                                                      step_overlaps_b));
4141     }
4142
4143   else
4144     {
4145       *overlaps_a = affine_fn_cst (integer_zero_node);
4146       *overlaps_b = affine_fn_cst (integer_zero_node);
4147       *last_conflicts = integer_zero_node;
4148     }
4149 }
4150
4151 /* Solves the special case of a Diophantine equation where CHREC_A is
4152    an affine bivariate function, and CHREC_B is an affine univariate
4153    function.  For example,
4154
4155    | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
4156
4157    has the following overlapping functions:
4158
4159    | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
4160    | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
4161    | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
4162
4163    FORNOW: This is a specialized implementation for a case occurring in
4164    a common benchmark.  Implement the general algorithm.  */
4165
4166 static void
4167 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
4168                                       conflict_function **overlaps_a,
4169                                       conflict_function **overlaps_b,
4170                                       tree *last_conflicts)
4171 {
4172   bool xz_p, yz_p, xyz_p;
4173   HOST_WIDE_INT step_x, step_y, step_z;
4174   HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
4175   affine_fn overlaps_a_xz, overlaps_b_xz;
4176   affine_fn overlaps_a_yz, overlaps_b_yz;
4177   affine_fn overlaps_a_xyz, overlaps_b_xyz;
4178   affine_fn ova1, ova2, ovb;
4179   tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
4180
4181   step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
4182   step_y = int_cst_value (CHREC_RIGHT (chrec_a));
4183   step_z = int_cst_value (CHREC_RIGHT (chrec_b));
4184
4185   niter_x = max_stmt_executions_int (get_chrec_loop (CHREC_LEFT (chrec_a)));
4186   niter_y = max_stmt_executions_int (get_chrec_loop (chrec_a));
4187   niter_z = max_stmt_executions_int (get_chrec_loop (chrec_b));
4188
4189   if (niter_x < 0 || niter_y < 0 || niter_z < 0)
4190     {
4191       if (dump_file && (dump_flags & TDF_DETAILS))
4192         fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
4193
4194       *overlaps_a = conflict_fn_not_known ();
4195       *overlaps_b = conflict_fn_not_known ();
4196       *last_conflicts = chrec_dont_know;
4197       return;
4198     }
4199
4200   niter = MIN (niter_x, niter_z);
4201   compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
4202                                            &overlaps_a_xz,
4203                                            &overlaps_b_xz,
4204                                            &last_conflicts_xz, 1);
4205   niter = MIN (niter_y, niter_z);
4206   compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
4207                                            &overlaps_a_yz,
4208                                            &overlaps_b_yz,
4209                                            &last_conflicts_yz, 2);
4210   niter = MIN (niter_x, niter_z);
4211   niter = MIN (niter_y, niter);
4212   compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
4213                                            &overlaps_a_xyz,
4214                                            &overlaps_b_xyz,
4215                                            &last_conflicts_xyz, 3);
4216
4217   xz_p = !integer_zerop (last_conflicts_xz);
4218   yz_p = !integer_zerop (last_conflicts_yz);
4219   xyz_p = !integer_zerop (last_conflicts_xyz);
4220
4221   if (xz_p || yz_p || xyz_p)
4222     {
4223       ova1 = affine_fn_cst (integer_zero_node);
4224       ova2 = affine_fn_cst (integer_zero_node);
4225       ovb = affine_fn_cst (integer_zero_node);
4226       if (xz_p)
4227         {
4228           affine_fn t0 = ova1;
4229           affine_fn t2 = ovb;
4230
4231           ova1 = affine_fn_plus (ova1, overlaps_a_xz);
4232           ovb = affine_fn_plus (ovb, overlaps_b_xz);
4233           affine_fn_free (t0);
4234           affine_fn_free (t2);
4235           *last_conflicts = last_conflicts_xz;
4236         }
4237       if (yz_p)
4238         {
4239           affine_fn t0 = ova2;
4240           affine_fn t2 = ovb;
4241
4242           ova2 = affine_fn_plus (ova2, overlaps_a_yz);
4243           ovb = affine_fn_plus (ovb, overlaps_b_yz);
4244           affine_fn_free (t0);
4245           affine_fn_free (t2);
4246           *last_conflicts = last_conflicts_yz;
4247         }
4248       if (xyz_p)
4249         {
4250           affine_fn t0 = ova1;
4251           affine_fn t2 = ova2;
4252           affine_fn t4 = ovb;
4253
4254           ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
4255           ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
4256           ovb = affine_fn_plus (ovb, overlaps_b_xyz);
4257           affine_fn_free (t0);
4258           affine_fn_free (t2);
4259           affine_fn_free (t4);
4260           *last_conflicts = last_conflicts_xyz;
4261         }
4262       *overlaps_a = conflict_fn (2, ova1, ova2);
4263       *overlaps_b = conflict_fn (1, ovb);
4264     }
4265   else
4266     {
4267       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4268       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4269       *last_conflicts = integer_zero_node;
4270     }
4271
4272   affine_fn_free (overlaps_a_xz);
4273   affine_fn_free (overlaps_b_xz);
4274   affine_fn_free (overlaps_a_yz);
4275   affine_fn_free (overlaps_b_yz);
4276   affine_fn_free (overlaps_a_xyz);
4277   affine_fn_free (overlaps_b_xyz);
4278 }
4279
4280 /* Copy the elements of vector VEC1 with length SIZE to VEC2.  */
4281
4282 static void
4283 lambda_vector_copy (lambda_vector vec1, lambda_vector vec2,
4284                     int size)
4285 {
4286   memcpy (vec2, vec1, size * sizeof (*vec1));
4287 }
4288
4289 /* Copy the elements of M x N matrix MAT1 to MAT2.  */
4290
4291 static void
4292 lambda_matrix_copy (lambda_matrix mat1, lambda_matrix mat2,
4293                     int m, int n)
4294 {
4295   int i;
4296
4297   for (i = 0; i < m; i++)
4298     lambda_vector_copy (mat1[i], mat2[i], n);
4299 }
4300
4301 /* Store the N x N identity matrix in MAT.  */
4302
4303 static void
4304 lambda_matrix_id (lambda_matrix mat, int size)
4305 {
4306   int i, j;
4307
4308   for (i = 0; i < size; i++)
4309     for (j = 0; j < size; j++)
4310       mat[i][j] = (i == j) ? 1 : 0;
4311 }
4312
4313 /* Return the index of the first nonzero element of vector VEC1 between
4314    START and N.  We must have START <= N.
4315    Returns N if VEC1 is the zero vector.  */
4316
4317 static int
4318 lambda_vector_first_nz (lambda_vector vec1, int n, int start)
4319 {
4320   int j = start;
4321   while (j < n && vec1[j] == 0)
4322     j++;
4323   return j;
4324 }
4325
4326 /* Add a multiple of row R1 of matrix MAT with N columns to row R2:
4327    R2 = R2 + CONST1 * R1.  */
4328
4329 static bool
4330 lambda_matrix_row_add (lambda_matrix mat, int n, int r1, int r2,
4331                        lambda_int const1)
4332 {
4333   int i;
4334
4335   if (const1 == 0)
4336     return true;
4337
4338   for (i = 0; i < n; i++)
4339     {
4340       bool ovf;
4341       lambda_int tem = mul_hwi (mat[r1][i], const1, &ovf);
4342       if (ovf)
4343         return false;
4344       lambda_int tem2 = add_hwi (mat[r2][i], tem, &ovf);
4345       if (ovf || tem2 == HOST_WIDE_INT_MIN)
4346         return false;
4347       mat[r2][i] = tem2;
4348     }
4349
4350   return true;
4351 }
4352
4353 /* Multiply vector VEC1 of length SIZE by a constant CONST1,
4354    and store the result in VEC2.  */
4355
4356 static void
4357 lambda_vector_mult_const (lambda_vector vec1, lambda_vector vec2,
4358                           int size, lambda_int const1)
4359 {
4360   int i;
4361
4362   if (const1 == 0)
4363     lambda_vector_clear (vec2, size);
4364   else
4365     for (i = 0; i < size; i++)
4366       vec2[i] = const1 * vec1[i];
4367 }
4368
4369 /* Negate vector VEC1 with length SIZE and store it in VEC2.  */
4370
4371 static void
4372 lambda_vector_negate (lambda_vector vec1, lambda_vector vec2,
4373                       int size)
4374 {
4375   lambda_vector_mult_const (vec1, vec2, size, -1);
4376 }
4377
4378 /* Negate row R1 of matrix MAT which has N columns.  */
4379
4380 static void
4381 lambda_matrix_row_negate (lambda_matrix mat, int n, int r1)
4382 {
4383   lambda_vector_negate (mat[r1], mat[r1], n);
4384 }
4385
4386 /* Return true if two vectors are equal.  */
4387
4388 static bool
4389 lambda_vector_equal (lambda_vector vec1, lambda_vector vec2, int size)
4390 {
4391   int i;
4392   for (i = 0; i < size; i++)
4393     if (vec1[i] != vec2[i])
4394       return false;
4395   return true;
4396 }
4397
4398 /* Given an M x N integer matrix A, this function determines an M x
4399    M unimodular matrix U, and an M x N echelon matrix S such that
4400    "U.A = S".  This decomposition is also known as "right Hermite".
4401
4402    Ref: Algorithm 2.1 page 33 in "Loop Transformations for
4403    Restructuring Compilers" Utpal Banerjee.  */
4404
4405 static bool
4406 lambda_matrix_right_hermite (lambda_matrix A, int m, int n,
4407                              lambda_matrix S, lambda_matrix U)
4408 {
4409   int i, j, i0 = 0;
4410
4411   lambda_matrix_copy (A, S, m, n);
4412   lambda_matrix_id (U, m);
4413
4414   for (j = 0; j < n; j++)
4415     {
4416       if (lambda_vector_first_nz (S[j], m, i0) < m)
4417         {
4418           ++i0;
4419           for (i = m - 1; i >= i0; i--)
4420             {
4421               while (S[i][j] != 0)
4422                 {
4423                   lambda_int factor, a, b;
4424
4425                   a = S[i-1][j];
4426                   b = S[i][j];
4427                   gcc_assert (a != HOST_WIDE_INT_MIN);
4428                   factor = a / b;
4429
4430                   if (!lambda_matrix_row_add (S, n, i, i-1, -factor))
4431                     return false;
4432                   std::swap (S[i], S[i-1]);
4433
4434                   if (!lambda_matrix_row_add (U, m, i, i-1, -factor))
4435                     return false;
4436                   std::swap (U[i], U[i-1]);
4437                 }
4438             }
4439         }
4440     }
4441
4442   return true;
4443 }
4444
4445 /* Determines the overlapping elements due to accesses CHREC_A and
4446    CHREC_B, that are affine functions.  This function cannot handle
4447    symbolic evolution functions, ie. when initial conditions are
4448    parameters, because it uses lambda matrices of integers.  */
4449
4450 static void
4451 analyze_subscript_affine_affine (tree chrec_a,
4452                                  tree chrec_b,
4453                                  conflict_function **overlaps_a,
4454                                  conflict_function **overlaps_b,
4455                                  tree *last_conflicts)
4456 {
4457   unsigned nb_vars_a, nb_vars_b, dim;
4458   lambda_int gamma, gcd_alpha_beta;
4459   lambda_matrix A, U, S;
4460   struct obstack scratch_obstack;
4461
4462   if (eq_evolutions_p (chrec_a, chrec_b))
4463     {
4464       /* The accessed index overlaps for each iteration in the
4465          loop.  */
4466       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4467       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4468       *last_conflicts = chrec_dont_know;
4469       return;
4470     }
4471   if (dump_file && (dump_flags & TDF_DETAILS))
4472     fprintf (dump_file, "(analyze_subscript_affine_affine \n");
4473
4474   /* For determining the initial intersection, we have to solve a
4475      Diophantine equation.  This is the most time consuming part.
4476
4477      For answering to the question: "Is there a dependence?" we have
4478      to prove that there exists a solution to the Diophantine
4479      equation, and that the solution is in the iteration domain,
4480      i.e. the solution is positive or zero, and that the solution
4481      happens before the upper bound loop.nb_iterations.  Otherwise
4482      there is no dependence.  This function outputs a description of
4483      the iterations that hold the intersections.  */
4484
4485   nb_vars_a = nb_vars_in_chrec (chrec_a);
4486   nb_vars_b = nb_vars_in_chrec (chrec_b);
4487
4488   gcc_obstack_init (&scratch_obstack);
4489
4490   dim = nb_vars_a + nb_vars_b;
4491   U = lambda_matrix_new (dim, dim, &scratch_obstack);
4492   A = lambda_matrix_new (dim, 1, &scratch_obstack);
4493   S = lambda_matrix_new (dim, 1, &scratch_obstack);
4494
4495   tree init_a = initialize_matrix_A (A, chrec_a, 0, 1);
4496   tree init_b = initialize_matrix_A (A, chrec_b, nb_vars_a, -1);
4497   if (init_a == chrec_dont_know
4498       || init_b == chrec_dont_know)
4499     {
4500       if (dump_file && (dump_flags & TDF_DETAILS))
4501         fprintf (dump_file, "affine-affine test failed: "
4502                  "representation issue.\n");
4503       *overlaps_a = conflict_fn_not_known ();
4504       *overlaps_b = conflict_fn_not_known ();
4505       *last_conflicts = chrec_dont_know;
4506       goto end_analyze_subs_aa;
4507     }
4508   gamma = int_cst_value (init_b) - int_cst_value (init_a);
4509
4510   /* Don't do all the hard work of solving the Diophantine equation
4511      when we already know the solution: for example,
4512      | {3, +, 1}_1
4513      | {3, +, 4}_2
4514      | gamma = 3 - 3 = 0.
4515      Then the first overlap occurs during the first iterations:
4516      | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
4517   */
4518   if (gamma == 0)
4519     {
4520       if (nb_vars_a == 1 && nb_vars_b == 1)
4521         {
4522           HOST_WIDE_INT step_a, step_b;
4523           HOST_WIDE_INT niter, niter_a, niter_b;
4524           affine_fn ova, ovb;
4525
4526           niter_a = max_stmt_executions_int (get_chrec_loop (chrec_a));
4527           niter_b = max_stmt_executions_int (get_chrec_loop (chrec_b));
4528           niter = MIN (niter_a, niter_b);
4529           step_a = int_cst_value (CHREC_RIGHT (chrec_a));
4530           step_b = int_cst_value (CHREC_RIGHT (chrec_b));
4531
4532           compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
4533                                                    &ova, &ovb,
4534                                                    last_conflicts, 1);
4535           *overlaps_a = conflict_fn (1, ova);
4536           *overlaps_b = conflict_fn (1, ovb);
4537         }
4538
4539       else if (nb_vars_a == 2 && nb_vars_b == 1)
4540         compute_overlap_steps_for_affine_1_2
4541           (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
4542
4543       else if (nb_vars_a == 1 && nb_vars_b == 2)
4544         compute_overlap_steps_for_affine_1_2
4545           (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
4546
4547       else
4548         {
4549           if (dump_file && (dump_flags & TDF_DETAILS))
4550             fprintf (dump_file, "affine-affine test failed: too many variables.\n");
4551           *overlaps_a = conflict_fn_not_known ();
4552           *overlaps_b = conflict_fn_not_known ();
4553           *last_conflicts = chrec_dont_know;
4554         }
4555       goto end_analyze_subs_aa;
4556     }
4557
4558   /* U.A = S */
4559   if (!lambda_matrix_right_hermite (A, dim, 1, S, U))
4560     {
4561       *overlaps_a = conflict_fn_not_known ();
4562       *overlaps_b = conflict_fn_not_known ();
4563       *last_conflicts = chrec_dont_know;
4564       goto end_analyze_subs_aa;
4565     }
4566
4567   if (S[0][0] < 0)
4568     {
4569       S[0][0] *= -1;
4570       lambda_matrix_row_negate (U, dim, 0);
4571     }
4572   gcd_alpha_beta = S[0][0];
4573
4574   /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
4575      but that is a quite strange case.  Instead of ICEing, answer
4576      don't know.  */
4577   if (gcd_alpha_beta == 0)
4578     {
4579       *overlaps_a = conflict_fn_not_known ();
4580       *overlaps_b = conflict_fn_not_known ();
4581       *last_conflicts = chrec_dont_know;
4582       goto end_analyze_subs_aa;
4583     }
4584
4585   /* The classic "gcd-test".  */
4586   if (!int_divides_p (gcd_alpha_beta, gamma))
4587     {
4588       /* The "gcd-test" has determined that there is no integer
4589          solution, i.e. there is no dependence.  */
4590       *overlaps_a = conflict_fn_no_dependence ();
4591       *overlaps_b = conflict_fn_no_dependence ();
4592       *last_conflicts = integer_zero_node;
4593     }
4594
4595   /* Both access functions are univariate.  This includes SIV and MIV cases.  */
4596   else if (nb_vars_a == 1 && nb_vars_b == 1)
4597     {
4598       /* Both functions should have the same evolution sign.  */
4599       if (((A[0][0] > 0 && -A[1][0] > 0)
4600            || (A[0][0] < 0 && -A[1][0] < 0)))
4601         {
4602           /* The solutions are given by:
4603              |
4604              | [GAMMA/GCD_ALPHA_BETA  t].[u11 u12]  = [x0]
4605              |                           [u21 u22]    [y0]
4606
4607              For a given integer t.  Using the following variables,
4608
4609              | i0 = u11 * gamma / gcd_alpha_beta
4610              | j0 = u12 * gamma / gcd_alpha_beta
4611              | i1 = u21
4612              | j1 = u22
4613
4614              the solutions are:
4615
4616              | x0 = i0 + i1 * t,
4617              | y0 = j0 + j1 * t.  */
4618           HOST_WIDE_INT i0, j0, i1, j1;
4619
4620           i0 = U[0][0] * gamma / gcd_alpha_beta;
4621           j0 = U[0][1] * gamma / gcd_alpha_beta;
4622           i1 = U[1][0];
4623           j1 = U[1][1];
4624
4625           if ((i1 == 0 && i0 < 0)
4626               || (j1 == 0 && j0 < 0))
4627             {
4628               /* There is no solution.
4629                  FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
4630                  falls in here, but for the moment we don't look at the
4631                  upper bound of the iteration domain.  */
4632               *overlaps_a = conflict_fn_no_dependence ();
4633               *overlaps_b = conflict_fn_no_dependence ();
4634               *last_conflicts = integer_zero_node;
4635               goto end_analyze_subs_aa;
4636             }
4637
4638           if (i1 > 0 && j1 > 0)
4639             {
4640               HOST_WIDE_INT niter_a
4641                 = max_stmt_executions_int (get_chrec_loop (chrec_a));
4642               HOST_WIDE_INT niter_b
4643                 = max_stmt_executions_int (get_chrec_loop (chrec_b));
4644               HOST_WIDE_INT niter = MIN (niter_a, niter_b);
4645
4646               /* (X0, Y0) is a solution of the Diophantine equation:
4647                  "chrec_a (X0) = chrec_b (Y0)".  */
4648               HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
4649                                         CEIL (-j0, j1));
4650               HOST_WIDE_INT x0 = i1 * tau1 + i0;
4651               HOST_WIDE_INT y0 = j1 * tau1 + j0;
4652
4653               /* (X1, Y1) is the smallest positive solution of the eq
4654                  "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
4655                  first conflict occurs.  */
4656               HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
4657               HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
4658               HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
4659
4660               if (niter > 0)
4661                 {
4662                   /* If the overlap occurs outside of the bounds of the
4663                      loop, there is no dependence.  */
4664                   if (x1 >= niter_a || y1 >= niter_b)
4665                     {
4666                       *overlaps_a = conflict_fn_no_dependence ();
4667                       *overlaps_b = conflict_fn_no_dependence ();
4668                       *last_conflicts = integer_zero_node;
4669                       goto end_analyze_subs_aa;
4670                     }
4671
4672                   /* max stmt executions can get quite large, avoid
4673                      overflows by using wide ints here.  */
4674                   widest_int tau2
4675                     = wi::smin (wi::sdiv_floor (wi::sub (niter_a, i0), i1),
4676                                 wi::sdiv_floor (wi::sub (niter_b, j0), j1));
4677                   widest_int last_conflict = wi::sub (tau2, (x1 - i0)/i1);
4678                   if (wi::min_precision (last_conflict, SIGNED)
4679                       <= TYPE_PRECISION (integer_type_node))
4680                     *last_conflicts
4681                        = build_int_cst (integer_type_node,
4682                                         last_conflict.to_shwi ());
4683                   else
4684                     *last_conflicts = chrec_dont_know;
4685                 }
4686               else
4687                 *last_conflicts = chrec_dont_know;
4688
4689               *overlaps_a
4690                 = conflict_fn (1,
4691                                affine_fn_univar (build_int_cst (NULL_TREE, x1),
4692                                                  1,
4693                                                  build_int_cst (NULL_TREE, i1)));
4694               *overlaps_b
4695                 = conflict_fn (1,
4696                                affine_fn_univar (build_int_cst (NULL_TREE, y1),
4697                                                  1,
4698                                                  build_int_cst (NULL_TREE, j1)));
4699             }
4700           else
4701             {
4702               /* FIXME: For the moment, the upper bound of the
4703                  iteration domain for i and j is not checked.  */
4704               if (dump_file && (dump_flags & TDF_DETAILS))
4705                 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4706               *overlaps_a = conflict_fn_not_known ();
4707               *overlaps_b = conflict_fn_not_known ();
4708               *last_conflicts = chrec_dont_know;
4709             }
4710         }
4711       else
4712         {
4713           if (dump_file && (dump_flags & TDF_DETAILS))
4714             fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4715           *overlaps_a = conflict_fn_not_known ();
4716           *overlaps_b = conflict_fn_not_known ();
4717           *last_conflicts = chrec_dont_know;
4718         }
4719     }
4720   else
4721     {
4722       if (dump_file && (dump_flags & TDF_DETAILS))
4723         fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
4724       *overlaps_a = conflict_fn_not_known ();
4725       *overlaps_b = conflict_fn_not_known ();
4726       *last_conflicts = chrec_dont_know;
4727     }
4728
4729 end_analyze_subs_aa:
4730   obstack_free (&scratch_obstack, NULL);
4731   if (dump_file && (dump_flags & TDF_DETAILS))
4732     {
4733       fprintf (dump_file, "  (overlaps_a = ");
4734       dump_conflict_function (dump_file, *overlaps_a);
4735       fprintf (dump_file, ")\n  (overlaps_b = ");
4736       dump_conflict_function (dump_file, *overlaps_b);
4737       fprintf (dump_file, "))\n");
4738     }
4739 }
4740
4741 /* Returns true when analyze_subscript_affine_affine can be used for
4742    determining the dependence relation between chrec_a and chrec_b,
4743    that contain symbols.  This function modifies chrec_a and chrec_b
4744    such that the analysis result is the same, and such that they don't
4745    contain symbols, and then can safely be passed to the analyzer.
4746
4747    Example: The analysis of the following tuples of evolutions produce
4748    the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
4749    vs. {0, +, 1}_1
4750
4751    {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
4752    {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
4753 */
4754
4755 static bool
4756 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
4757 {
4758   tree diff, type, left_a, left_b, right_b;
4759
4760   if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
4761       || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
4762     /* FIXME: For the moment not handled.  Might be refined later.  */
4763     return false;
4764
4765   type = chrec_type (*chrec_a);
4766   left_a = CHREC_LEFT (*chrec_a);
4767   left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
4768   diff = chrec_fold_minus (type, left_a, left_b);
4769
4770   if (!evolution_function_is_constant_p (diff))
4771     return false;
4772
4773   if (dump_file && (dump_flags & TDF_DETAILS))
4774     fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
4775
4776   *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
4777                                      diff, CHREC_RIGHT (*chrec_a));
4778   right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
4779   *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
4780                                      build_int_cst (type, 0),
4781                                      right_b);
4782   return true;
4783 }
4784
4785 /* Analyze a SIV (Single Index Variable) subscript.  *OVERLAPS_A and
4786    *OVERLAPS_B are initialized to the functions that describe the
4787    relation between the elements accessed twice by CHREC_A and
4788    CHREC_B.  For k >= 0, the following property is verified:
4789
4790    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4791
4792 static void
4793 analyze_siv_subscript (tree chrec_a,
4794                        tree chrec_b,
4795                        conflict_function **overlaps_a,
4796                        conflict_function **overlaps_b,
4797                        tree *last_conflicts,
4798                        int loop_nest_num)
4799 {
4800   dependence_stats.num_siv++;
4801
4802   if (dump_file && (dump_flags & TDF_DETAILS))
4803     fprintf (dump_file, "(analyze_siv_subscript \n");
4804
4805   if (evolution_function_is_constant_p (chrec_a)
4806       && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4807     analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
4808                                       overlaps_a, overlaps_b, last_conflicts);
4809
4810   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4811            && evolution_function_is_constant_p (chrec_b))
4812     analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
4813                                       overlaps_b, overlaps_a, last_conflicts);
4814
4815   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
4816            && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
4817     {
4818       if (!chrec_contains_symbols (chrec_a)
4819           && !chrec_contains_symbols (chrec_b))
4820         {
4821           analyze_subscript_affine_affine (chrec_a, chrec_b,
4822                                            overlaps_a, overlaps_b,
4823                                            last_conflicts);
4824
4825           if (CF_NOT_KNOWN_P (*overlaps_a)
4826               || CF_NOT_KNOWN_P (*overlaps_b))
4827             dependence_stats.num_siv_unimplemented++;
4828           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4829                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4830             dependence_stats.num_siv_independent++;
4831           else
4832             dependence_stats.num_siv_dependent++;
4833         }
4834       else if (can_use_analyze_subscript_affine_affine (&chrec_a,
4835                                                         &chrec_b))
4836         {
4837           analyze_subscript_affine_affine (chrec_a, chrec_b,
4838                                            overlaps_a, overlaps_b,
4839                                            last_conflicts);
4840
4841           if (CF_NOT_KNOWN_P (*overlaps_a)
4842               || CF_NOT_KNOWN_P (*overlaps_b))
4843             dependence_stats.num_siv_unimplemented++;
4844           else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4845                    || CF_NO_DEPENDENCE_P (*overlaps_b))
4846             dependence_stats.num_siv_independent++;
4847           else
4848             dependence_stats.num_siv_dependent++;
4849         }
4850       else
4851         goto siv_subscript_dontknow;
4852     }
4853
4854   else
4855     {
4856     siv_subscript_dontknow:;
4857       if (dump_file && (dump_flags & TDF_DETAILS))
4858         fprintf (dump_file, "  siv test failed: unimplemented");
4859       *overlaps_a = conflict_fn_not_known ();
4860       *overlaps_b = conflict_fn_not_known ();
4861       *last_conflicts = chrec_dont_know;
4862       dependence_stats.num_siv_unimplemented++;
4863     }
4864
4865   if (dump_file && (dump_flags & TDF_DETAILS))
4866     fprintf (dump_file, ")\n");
4867 }
4868
4869 /* Returns false if we can prove that the greatest common divisor of the steps
4870    of CHREC does not divide CST, false otherwise.  */
4871
4872 static bool
4873 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
4874 {
4875   HOST_WIDE_INT cd = 0, val;
4876   tree step;
4877
4878   if (!tree_fits_shwi_p (cst))
4879     return true;
4880   val = tree_to_shwi (cst);
4881
4882   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
4883     {
4884       step = CHREC_RIGHT (chrec);
4885       if (!tree_fits_shwi_p (step))
4886         return true;
4887       cd = gcd (cd, tree_to_shwi (step));
4888       chrec = CHREC_LEFT (chrec);
4889     }
4890
4891   return val % cd == 0;
4892 }
4893
4894 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
4895    LOOP_NEST.  *OVERLAPS_A and *OVERLAPS_B are initialized to the
4896    functions that describe the relation between the elements accessed
4897    twice by CHREC_A and CHREC_B.  For k >= 0, the following property
4898    is verified:
4899
4900    CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)).  */
4901
4902 static void
4903 analyze_miv_subscript (tree chrec_a,
4904                        tree chrec_b,
4905                        conflict_function **overlaps_a,
4906                        conflict_function **overlaps_b,
4907                        tree *last_conflicts,
4908                        class loop *loop_nest)
4909 {
4910   tree type, difference;
4911
4912   dependence_stats.num_miv++;
4913   if (dump_file && (dump_flags & TDF_DETAILS))
4914     fprintf (dump_file, "(analyze_miv_subscript \n");
4915
4916   type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
4917   chrec_a = chrec_convert (type, chrec_a, NULL);
4918   chrec_b = chrec_convert (type, chrec_b, NULL);
4919   difference = chrec_fold_minus (type, chrec_a, chrec_b);
4920
4921   if (eq_evolutions_p (chrec_a, chrec_b))
4922     {
4923       /* Access functions are the same: all the elements are accessed
4924          in the same order.  */
4925       *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
4926       *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
4927       *last_conflicts = max_stmt_executions_tree (get_chrec_loop (chrec_a));
4928       dependence_stats.num_miv_dependent++;
4929     }
4930
4931   else if (evolution_function_is_constant_p (difference)
4932            && evolution_function_is_affine_multivariate_p (chrec_a,
4933                                                            loop_nest->num)
4934            && !gcd_of_steps_may_divide_p (chrec_a, difference))
4935     {
4936       /* testsuite/.../ssa-chrec-33.c
4937          {{21, +, 2}_1, +, -2}_2  vs.  {{20, +, 2}_1, +, -2}_2
4938
4939          The difference is 1, and all the evolution steps are multiples
4940          of 2, consequently there are no overlapping elements.  */
4941       *overlaps_a = conflict_fn_no_dependence ();
4942       *overlaps_b = conflict_fn_no_dependence ();
4943       *last_conflicts = integer_zero_node;
4944       dependence_stats.num_miv_independent++;
4945     }
4946
4947   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest->num)
4948            && !chrec_contains_symbols (chrec_a, loop_nest)
4949            && evolution_function_is_affine_in_loop (chrec_b, loop_nest->num)
4950            && !chrec_contains_symbols (chrec_b, loop_nest))
4951     {
4952       /* testsuite/.../ssa-chrec-35.c
4953          {0, +, 1}_2  vs.  {0, +, 1}_3
4954          the overlapping elements are respectively located at iterations:
4955          {0, +, 1}_x and {0, +, 1}_x,
4956          in other words, we have the equality:
4957          {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
4958
4959          Other examples:
4960          {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
4961          {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
4962
4963          {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
4964          {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
4965       */
4966       analyze_subscript_affine_affine (chrec_a, chrec_b,
4967                                        overlaps_a, overlaps_b, last_conflicts);
4968
4969       if (CF_NOT_KNOWN_P (*overlaps_a)
4970           || CF_NOT_KNOWN_P (*overlaps_b))
4971         dependence_stats.num_miv_unimplemented++;
4972       else if (CF_NO_DEPENDENCE_P (*overlaps_a)
4973                || CF_NO_DEPENDENCE_P (*overlaps_b))
4974         dependence_stats.num_miv_independent++;
4975       else
4976         dependence_stats.num_miv_dependent++;
4977     }
4978
4979   else
4980     {
4981       /* When the analysis is too difficult, answer "don't know".  */
4982       if (dump_file && (dump_flags & TDF_DETAILS))
4983         fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
4984
4985       *overlaps_a = conflict_fn_not_known ();
4986       *overlaps_b = conflict_fn_not_known ();
4987       *last_conflicts = chrec_dont_know;
4988       dependence_stats.num_miv_unimplemented++;
4989     }
4990
4991   if (dump_file && (dump_flags & TDF_DETAILS))
4992     fprintf (dump_file, ")\n");
4993 }
4994
4995 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
4996    with respect to LOOP_NEST.  OVERLAP_ITERATIONS_A and
4997    OVERLAP_ITERATIONS_B are initialized with two functions that
4998    describe the iterations that contain conflicting elements.
4999
5000    Remark: For an integer k >= 0, the following equality is true:
5001
5002    CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
5003 */
5004
5005 static void
5006 analyze_overlapping_iterations (tree chrec_a,
5007                                 tree chrec_b,
5008                                 conflict_function **overlap_iterations_a,
5009                                 conflict_function **overlap_iterations_b,
5010                                 tree *last_conflicts, class loop *loop_nest)
5011 {
5012   unsigned int lnn = loop_nest->num;
5013
5014   dependence_stats.num_subscript_tests++;
5015
5016   if (dump_file && (dump_flags & TDF_DETAILS))
5017     {
5018       fprintf (dump_file, "(analyze_overlapping_iterations \n");
5019       fprintf (dump_file, "  (chrec_a = ");
5020       print_generic_expr (dump_file, chrec_a);
5021       fprintf (dump_file, ")\n  (chrec_b = ");
5022       print_generic_expr (dump_file, chrec_b);
5023       fprintf (dump_file, ")\n");
5024     }
5025
5026   if (chrec_a == NULL_TREE
5027       || chrec_b == NULL_TREE
5028       || chrec_contains_undetermined (chrec_a)
5029       || chrec_contains_undetermined (chrec_b))
5030     {
5031       dependence_stats.num_subscript_undetermined++;
5032
5033       *overlap_iterations_a = conflict_fn_not_known ();
5034       *overlap_iterations_b = conflict_fn_not_known ();
5035     }
5036
5037   /* If they are the same chrec, and are affine, they overlap
5038      on every iteration.  */
5039   else if (eq_evolutions_p (chrec_a, chrec_b)
5040            && (evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5041                || operand_equal_p (chrec_a, chrec_b, 0)))
5042     {
5043       dependence_stats.num_same_subscript_function++;
5044       *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
5045       *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
5046       *last_conflicts = chrec_dont_know;
5047     }
5048
5049   /* If they aren't the same, and aren't affine, we can't do anything
5050      yet.  */
5051   else if ((chrec_contains_symbols (chrec_a)
5052             || chrec_contains_symbols (chrec_b))
5053            && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
5054                || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
5055     {
5056       dependence_stats.num_subscript_undetermined++;
5057       *overlap_iterations_a = conflict_fn_not_known ();
5058       *overlap_iterations_b = conflict_fn_not_known ();
5059     }
5060
5061   else if (ziv_subscript_p (chrec_a, chrec_b))
5062     analyze_ziv_subscript (chrec_a, chrec_b,
5063                            overlap_iterations_a, overlap_iterations_b,
5064                            last_conflicts);
5065
5066   else if (siv_subscript_p (chrec_a, chrec_b))
5067     analyze_siv_subscript (chrec_a, chrec_b,
5068                            overlap_iterations_a, overlap_iterations_b,
5069                            last_conflicts, lnn);
5070
5071   else
5072     analyze_miv_subscript (chrec_a, chrec_b,
5073                            overlap_iterations_a, overlap_iterations_b,
5074                            last_conflicts, loop_nest);
5075
5076   if (dump_file && (dump_flags & TDF_DETAILS))
5077     {
5078       fprintf (dump_file, "  (overlap_iterations_a = ");
5079       dump_conflict_function (dump_file, *overlap_iterations_a);
5080       fprintf (dump_file, ")\n  (overlap_iterations_b = ");
5081       dump_conflict_function (dump_file, *overlap_iterations_b);
5082       fprintf (dump_file, "))\n");
5083     }
5084 }
5085
5086 /* Helper function for uniquely inserting distance vectors.  */
5087
5088 static void
5089 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
5090 {
5091   for (lambda_vector v : DDR_DIST_VECTS (ddr))
5092     if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
5093       return;
5094
5095   DDR_DIST_VECTS (ddr).safe_push (dist_v);
5096 }
5097
5098 /* Helper function for uniquely inserting direction vectors.  */
5099
5100 static void
5101 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
5102 {
5103   for (lambda_vector v : DDR_DIR_VECTS (ddr))
5104     if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
5105       return;
5106
5107   DDR_DIR_VECTS (ddr).safe_push (dir_v);
5108 }
5109
5110 /* Add a distance of 1 on all the loops outer than INDEX.  If we
5111    haven't yet determined a distance for this outer loop, push a new
5112    distance vector composed of the previous distance, and a distance
5113    of 1 for this outer loop.  Example:
5114
5115    | loop_1
5116    |   loop_2
5117    |     A[10]
5118    |   endloop_2
5119    | endloop_1
5120
5121    Saved vectors are of the form (dist_in_1, dist_in_2).  First, we
5122    save (0, 1), then we have to save (1, 0).  */
5123
5124 static void
5125 add_outer_distances (struct data_dependence_relation *ddr,
5126                      lambda_vector dist_v, int index)
5127 {
5128   /* For each outer loop where init_v is not set, the accesses are
5129      in dependence of distance 1 in the loop.  */
5130   while (--index >= 0)
5131     {
5132       lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5133       lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5134       save_v[index] = 1;
5135       save_dist_v (ddr, save_v);
5136     }
5137 }
5138
5139 /* Return false when fail to represent the data dependence as a
5140    distance vector.  A_INDEX is the index of the first reference
5141    (0 for DDR_A, 1 for DDR_B) and B_INDEX is the index of the
5142    second reference.  INIT_B is set to true when a component has been
5143    added to the distance vector DIST_V.  INDEX_CARRY is then set to
5144    the index in DIST_V that carries the dependence.  */
5145
5146 static bool
5147 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
5148                              unsigned int a_index, unsigned int b_index,
5149                              lambda_vector dist_v, bool *init_b,
5150                              int *index_carry)
5151 {
5152   unsigned i;
5153   lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5154   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5155
5156   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5157     {
5158       tree access_fn_a, access_fn_b;
5159       struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
5160
5161       if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5162         {
5163           non_affine_dependence_relation (ddr);
5164           return false;
5165         }
5166
5167       access_fn_a = SUB_ACCESS_FN (subscript, a_index);
5168       access_fn_b = SUB_ACCESS_FN (subscript, b_index);
5169
5170       if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
5171           && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
5172         {
5173           HOST_WIDE_INT dist;
5174           int index;
5175           int var_a = CHREC_VARIABLE (access_fn_a);
5176           int var_b = CHREC_VARIABLE (access_fn_b);
5177
5178           if (var_a != var_b
5179               || chrec_contains_undetermined (SUB_DISTANCE (subscript)))
5180             {
5181               non_affine_dependence_relation (ddr);
5182               return false;
5183             }
5184
5185           /* When data references are collected in a loop while data
5186              dependences are analyzed in loop nest nested in the loop, we
5187              would have more number of access functions than number of
5188              loops.  Skip access functions of loops not in the loop nest.
5189
5190              See PR89725 for more information.  */
5191           if (flow_loop_nested_p (get_loop (cfun, var_a), loop))
5192             continue;
5193
5194           dist = int_cst_value (SUB_DISTANCE (subscript));
5195           index = index_in_loop_nest (var_a, DDR_LOOP_NEST (ddr));
5196           *index_carry = MIN (index, *index_carry);
5197
5198           /* This is the subscript coupling test.  If we have already
5199              recorded a distance for this loop (a distance coming from
5200              another subscript), it should be the same.  For example,
5201              in the following code, there is no dependence:
5202
5203              | loop i = 0, N, 1
5204              |   T[i+1][i] = ...
5205              |   ... = T[i][i]
5206              | endloop
5207           */
5208           if (init_v[index] != 0 && dist_v[index] != dist)
5209             {
5210               finalize_ddr_dependent (ddr, chrec_known);
5211               return false;
5212             }
5213
5214           dist_v[index] = dist;
5215           init_v[index] = 1;
5216           *init_b = true;
5217         }
5218       else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
5219         {
5220           /* This can be for example an affine vs. constant dependence
5221              (T[i] vs. T[3]) that is not an affine dependence and is
5222              not representable as a distance vector.  */
5223           non_affine_dependence_relation (ddr);
5224           return false;
5225         }
5226     }
5227
5228   return true;
5229 }
5230
5231 /* Return true when the DDR contains only invariant access functions wrto. loop
5232    number LNUM.  */
5233
5234 static bool
5235 invariant_access_functions (const struct data_dependence_relation *ddr,
5236                             int lnum)
5237 {
5238   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5239     if (!evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 0), lnum)
5240         || !evolution_function_is_invariant_p (SUB_ACCESS_FN (sub, 1), lnum))
5241       return false;
5242
5243   return true;
5244 }
5245
5246 /* Helper function for the case where DDR_A and DDR_B are the same
5247    multivariate access function with a constant step.  For an example
5248    see pr34635-1.c.  */
5249
5250 static void
5251 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
5252 {
5253   int x_1, x_2;
5254   tree c_1 = CHREC_LEFT (c_2);
5255   tree c_0 = CHREC_LEFT (c_1);
5256   lambda_vector dist_v;
5257   HOST_WIDE_INT v1, v2, cd;
5258
5259   /* Polynomials with more than 2 variables are not handled yet.  When
5260      the evolution steps are parameters, it is not possible to
5261      represent the dependence using classical distance vectors.  */
5262   if (TREE_CODE (c_0) != INTEGER_CST
5263       || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
5264       || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
5265     {
5266       DDR_AFFINE_P (ddr) = false;
5267       return;
5268     }
5269
5270   x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
5271   x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
5272
5273   /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2).  */
5274   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5275   v1 = int_cst_value (CHREC_RIGHT (c_1));
5276   v2 = int_cst_value (CHREC_RIGHT (c_2));
5277   cd = gcd (v1, v2);
5278   v1 /= cd;
5279   v2 /= cd;
5280
5281   if (v2 < 0)
5282     {
5283       v2 = -v2;
5284       v1 = -v1;
5285     }
5286
5287   dist_v[x_1] = v2;
5288   dist_v[x_2] = -v1;
5289   save_dist_v (ddr, dist_v);
5290
5291   add_outer_distances (ddr, dist_v, x_1);
5292 }
5293
5294 /* Helper function for the case where DDR_A and DDR_B are the same
5295    access functions.  */
5296
5297 static void
5298 add_other_self_distances (struct data_dependence_relation *ddr)
5299 {
5300   lambda_vector dist_v;
5301   unsigned i;
5302   int index_carry = DDR_NB_LOOPS (ddr);
5303   subscript *sub;
5304   class loop *loop = DDR_LOOP_NEST (ddr)[0];
5305
5306   FOR_EACH_VEC_ELT (DDR_SUBSCRIPTS (ddr), i, sub)
5307     {
5308       tree access_fun = SUB_ACCESS_FN (sub, 0);
5309
5310       if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
5311         {
5312           if (!evolution_function_is_univariate_p (access_fun, loop->num))
5313             {
5314               if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
5315                 {
5316                   DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
5317                   return;
5318                 }
5319
5320               access_fun = SUB_ACCESS_FN (DDR_SUBSCRIPT (ddr, 0), 0);
5321
5322               if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
5323                 add_multivariate_self_dist (ddr, access_fun);
5324               else
5325                 /* The evolution step is not constant: it varies in
5326                    the outer loop, so this cannot be represented by a
5327                    distance vector.  For example in pr34635.c the
5328                    evolution is {0, +, {0, +, 4}_1}_2.  */
5329                 DDR_AFFINE_P (ddr) = false;
5330
5331               return;
5332             }
5333
5334           /* When data references are collected in a loop while data
5335              dependences are analyzed in loop nest nested in the loop, we
5336              would have more number of access functions than number of
5337              loops.  Skip access functions of loops not in the loop nest.
5338
5339              See PR89725 for more information.  */
5340           if (flow_loop_nested_p (get_loop (cfun, CHREC_VARIABLE (access_fun)),
5341                                   loop))
5342             continue;
5343
5344           index_carry = MIN (index_carry,
5345                              index_in_loop_nest (CHREC_VARIABLE (access_fun),
5346                                                  DDR_LOOP_NEST (ddr)));
5347         }
5348     }
5349
5350   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5351   add_outer_distances (ddr, dist_v, index_carry);
5352 }
5353
5354 static void
5355 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
5356 {
5357   lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5358
5359   dist_v[0] = 1;
5360   save_dist_v (ddr, dist_v);
5361 }
5362
5363 /* Adds a unit distance vector to DDR when there is a 0 overlap.  This
5364    is the case for example when access functions are the same and
5365    equal to a constant, as in:
5366
5367    | loop_1
5368    |   A[3] = ...
5369    |   ... = A[3]
5370    | endloop_1
5371
5372    in which case the distance vectors are (0) and (1).  */
5373
5374 static void
5375 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
5376 {
5377   unsigned i, j;
5378
5379   for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
5380     {
5381       subscript_p sub = DDR_SUBSCRIPT (ddr, i);
5382       conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
5383       conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
5384
5385       for (j = 0; j < ca->n; j++)
5386         if (affine_function_zero_p (ca->fns[j]))
5387           {
5388             insert_innermost_unit_dist_vector (ddr);
5389             return;
5390           }
5391
5392       for (j = 0; j < cb->n; j++)
5393         if (affine_function_zero_p (cb->fns[j]))
5394           {
5395             insert_innermost_unit_dist_vector (ddr);
5396             return;
5397           }
5398     }
5399 }
5400
5401 /* Return true when the DDR contains two data references that have the
5402    same access functions.  */
5403
5404 static inline bool
5405 same_access_functions (const struct data_dependence_relation *ddr)
5406 {
5407   for (subscript *sub : DDR_SUBSCRIPTS (ddr))
5408     if (!eq_evolutions_p (SUB_ACCESS_FN (sub, 0),
5409                           SUB_ACCESS_FN (sub, 1)))
5410       return false;
5411
5412   return true;
5413 }
5414
5415 /* Compute the classic per loop distance vector.  DDR is the data
5416    dependence relation to build a vector from.  Return false when fail
5417    to represent the data dependence as a distance vector.  */
5418
5419 static bool
5420 build_classic_dist_vector (struct data_dependence_relation *ddr,
5421                            class loop *loop_nest)
5422 {
5423   bool init_b = false;
5424   int index_carry = DDR_NB_LOOPS (ddr);
5425   lambda_vector dist_v;
5426
5427   if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
5428     return false;
5429
5430   if (same_access_functions (ddr))
5431     {
5432       /* Save the 0 vector.  */
5433       dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5434       save_dist_v (ddr, dist_v);
5435
5436       if (invariant_access_functions (ddr, loop_nest->num))
5437         add_distance_for_zero_overlaps (ddr);
5438
5439       if (DDR_NB_LOOPS (ddr) > 1)
5440         add_other_self_distances (ddr);
5441
5442       return true;
5443     }
5444
5445   dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5446   if (!build_classic_dist_vector_1 (ddr, 0, 1, dist_v, &init_b, &index_carry))
5447     return false;
5448
5449   /* Save the distance vector if we initialized one.  */
5450   if (init_b)
5451     {
5452       /* Verify a basic constraint: classic distance vectors should
5453          always be lexicographically positive.
5454
5455          Data references are collected in the order of execution of
5456          the program, thus for the following loop
5457
5458          | for (i = 1; i < 100; i++)
5459          |   for (j = 1; j < 100; j++)
5460          |     {
5461          |       t = T[j+1][i-1];  // A
5462          |       T[j][i] = t + 2;  // B
5463          |     }
5464
5465          references are collected following the direction of the wind:
5466          A then B.  The data dependence tests are performed also
5467          following this order, such that we're looking at the distance
5468          separating the elements accessed by A from the elements later
5469          accessed by B.  But in this example, the distance returned by
5470          test_dep (A, B) is lexicographically negative (-1, 1), that
5471          means that the access A occurs later than B with respect to
5472          the outer loop, ie. we're actually looking upwind.  In this
5473          case we solve test_dep (B, A) looking downwind to the
5474          lexicographically positive solution, that returns the
5475          distance vector (1, -1).  */
5476       if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
5477         {
5478           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5479           if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5480             return false;
5481           compute_subscript_distance (ddr);
5482           if (!build_classic_dist_vector_1 (ddr, 1, 0, save_v, &init_b,
5483                                             &index_carry))
5484             return false;
5485           save_dist_v (ddr, save_v);
5486           DDR_REVERSED_P (ddr) = true;
5487
5488           /* In this case there is a dependence forward for all the
5489              outer loops:
5490
5491              | for (k = 1; k < 100; k++)
5492              |  for (i = 1; i < 100; i++)
5493              |   for (j = 1; j < 100; j++)
5494              |     {
5495              |       t = T[j+1][i-1];  // A
5496              |       T[j][i] = t + 2;  // B
5497              |     }
5498
5499              the vectors are:
5500              (0,  1, -1)
5501              (1,  1, -1)
5502              (1, -1,  1)
5503           */
5504           if (DDR_NB_LOOPS (ddr) > 1)
5505             {
5506               add_outer_distances (ddr, save_v, index_carry);
5507               add_outer_distances (ddr, dist_v, index_carry);
5508             }
5509         }
5510       else
5511         {
5512           lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5513           lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
5514
5515           if (DDR_NB_LOOPS (ddr) > 1)
5516             {
5517               lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5518
5519               if (!subscript_dependence_tester_1 (ddr, 1, 0, loop_nest))
5520                 return false;
5521               compute_subscript_distance (ddr);
5522               if (!build_classic_dist_vector_1 (ddr, 1, 0, opposite_v, &init_b,
5523                                                 &index_carry))
5524                 return false;
5525
5526               save_dist_v (ddr, save_v);
5527               add_outer_distances (ddr, dist_v, index_carry);
5528               add_outer_distances (ddr, opposite_v, index_carry);
5529             }
5530           else
5531             save_dist_v (ddr, save_v);
5532         }
5533     }
5534   else
5535     {
5536       /* There is a distance of 1 on all the outer loops: Example:
5537          there is a dependence of distance 1 on loop_1 for the array A.
5538
5539          | loop_1
5540          |   A[5] = ...
5541          | endloop
5542       */
5543       add_outer_distances (ddr, dist_v,
5544                            lambda_vector_first_nz (dist_v,
5545                                                    DDR_NB_LOOPS (ddr), 0));
5546     }
5547
5548   return true;
5549 }
5550
5551 /* Return the direction for a given distance.
5552    FIXME: Computing dir this way is suboptimal, since dir can catch
5553    cases that dist is unable to represent.  */
5554
5555 static inline enum data_dependence_direction
5556 dir_from_dist (int dist)
5557 {
5558   if (dist > 0)
5559     return dir_positive;
5560   else if (dist < 0)
5561     return dir_negative;
5562   else
5563     return dir_equal;
5564 }
5565
5566 /* Compute the classic per loop direction vector.  DDR is the data
5567    dependence relation to build a vector from.  */
5568
5569 static void
5570 build_classic_dir_vector (struct data_dependence_relation *ddr)
5571 {
5572   unsigned i, j;
5573   lambda_vector dist_v;
5574
5575   FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
5576     {
5577       lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
5578
5579       for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
5580         dir_v[j] = dir_from_dist (dist_v[j]);
5581
5582       save_dir_v (ddr, dir_v);
5583     }
5584 }
5585
5586 /* Helper function.  Returns true when there is a dependence between the
5587    data references.  A_INDEX is the index of the first reference (0 for
5588    DDR_A, 1 for DDR_B) and B_INDEX is the index of the second reference.  */
5589
5590 static bool
5591 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
5592                                unsigned int a_index, unsigned int b_index,
5593                                class loop *loop_nest)
5594 {
5595   unsigned int i;
5596   tree last_conflicts;
5597   struct subscript *subscript;
5598   tree res = NULL_TREE;
5599
5600   for (i = 0; DDR_SUBSCRIPTS (ddr).iterate (i, &subscript); i++)
5601     {
5602       conflict_function *overlaps_a, *overlaps_b;
5603
5604       analyze_overlapping_iterations (SUB_ACCESS_FN (subscript, a_index),
5605                                       SUB_ACCESS_FN (subscript, b_index),
5606                                       &overlaps_a, &overlaps_b,
5607                                       &last_conflicts, loop_nest);
5608
5609       if (SUB_CONFLICTS_IN_A (subscript))
5610         free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
5611       if (SUB_CONFLICTS_IN_B (subscript))
5612         free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
5613
5614       SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
5615       SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
5616       SUB_LAST_CONFLICT (subscript) = last_conflicts;
5617
5618       /* If there is any undetermined conflict function we have to
5619          give a conservative answer in case we cannot prove that
5620          no dependence exists when analyzing another subscript.  */
5621       if (CF_NOT_KNOWN_P (overlaps_a)
5622           || CF_NOT_KNOWN_P (overlaps_b))
5623         {
5624           res = chrec_dont_know;
5625           continue;
5626         }
5627
5628       /* When there is a subscript with no dependence we can stop.  */
5629       else if (CF_NO_DEPENDENCE_P (overlaps_a)
5630                || CF_NO_DEPENDENCE_P (overlaps_b))
5631         {
5632           res = chrec_known;
5633           break;
5634         }
5635     }
5636
5637   if (res == NULL_TREE)
5638     return true;
5639
5640   if (res == chrec_known)
5641     dependence_stats.num_dependence_independent++;
5642   else
5643     dependence_stats.num_dependence_undetermined++;
5644   finalize_ddr_dependent (ddr, res);
5645   return false;
5646 }
5647
5648 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR.  */
5649
5650 static void
5651 subscript_dependence_tester (struct data_dependence_relation *ddr,
5652                              class loop *loop_nest)
5653 {
5654   if (subscript_dependence_tester_1 (ddr, 0, 1, loop_nest))
5655     dependence_stats.num_dependence_dependent++;
5656
5657   compute_subscript_distance (ddr);
5658   if (build_classic_dist_vector (ddr, loop_nest))
5659     {
5660       if (dump_file && (dump_flags & TDF_DETAILS))
5661         {
5662           unsigned i;
5663
5664           fprintf (dump_file, "(build_classic_dist_vector\n");
5665           for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
5666             {
5667               fprintf (dump_file, "  dist_vector = (");
5668               print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
5669                                    DDR_NB_LOOPS (ddr));
5670               fprintf (dump_file, "  )\n");
5671             }
5672           fprintf (dump_file, ")\n");
5673         }
5674
5675       build_classic_dir_vector (ddr);
5676     }
5677 }
5678
5679 /* Returns true when all the access functions of A are affine or
5680    constant with respect to LOOP_NEST.  */
5681
5682 static bool
5683 access_functions_are_affine_or_constant_p (const struct data_reference *a,
5684                                            const class loop *loop_nest)
5685 {
5686   vec<tree> fns = DR_ACCESS_FNS (a);
5687   for (tree t : fns)
5688     if (!evolution_function_is_invariant_p (t, loop_nest->num)
5689         && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
5690       return false;
5691
5692   return true;
5693 }
5694
5695 /* This computes the affine dependence relation between A and B with
5696    respect to LOOP_NEST.  CHREC_KNOWN is used for representing the
5697    independence between two accesses, while CHREC_DONT_KNOW is used
5698    for representing the unknown relation.
5699
5700    Note that it is possible to stop the computation of the dependence
5701    relation the first time we detect a CHREC_KNOWN element for a given
5702    subscript.  */
5703
5704 void
5705 compute_affine_dependence (struct data_dependence_relation *ddr,
5706                            class loop *loop_nest)
5707 {
5708   struct data_reference *dra = DDR_A (ddr);
5709   struct data_reference *drb = DDR_B (ddr);
5710
5711   if (dump_file && (dump_flags & TDF_DETAILS))
5712     {
5713       fprintf (dump_file, "(compute_affine_dependence\n");
5714       fprintf (dump_file, "  ref_a: ");
5715       print_generic_expr (dump_file, DR_REF (dra));
5716       fprintf (dump_file, ", stmt_a: ");
5717       print_gimple_stmt (dump_file, DR_STMT (dra), 0, TDF_SLIM);
5718       fprintf (dump_file, "  ref_b: ");
5719       print_generic_expr (dump_file, DR_REF (drb));
5720       fprintf (dump_file, ", stmt_b: ");
5721       print_gimple_stmt (dump_file, DR_STMT (drb), 0, TDF_SLIM);
5722     }
5723
5724   /* Analyze only when the dependence relation is not yet known.  */
5725   if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
5726     {
5727       dependence_stats.num_dependence_tests++;
5728
5729       if (access_functions_are_affine_or_constant_p (dra, loop_nest)
5730           && access_functions_are_affine_or_constant_p (drb, loop_nest))
5731         subscript_dependence_tester (ddr, loop_nest);
5732
5733       /* As a last case, if the dependence cannot be determined, or if
5734          the dependence is considered too difficult to determine, answer
5735          "don't know".  */
5736       else
5737         {
5738           dependence_stats.num_dependence_undetermined++;
5739
5740           if (dump_file && (dump_flags & TDF_DETAILS))
5741             {
5742               fprintf (dump_file, "Data ref a:\n");
5743               dump_data_reference (dump_file, dra);
5744               fprintf (dump_file, "Data ref b:\n");
5745               dump_data_reference (dump_file, drb);
5746               fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
5747             }
5748           finalize_ddr_dependent (ddr, chrec_dont_know);
5749         }
5750     }
5751
5752   if (dump_file && (dump_flags & TDF_DETAILS))
5753     {
5754       if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5755         fprintf (dump_file, ") -> no dependence\n");
5756       else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
5757         fprintf (dump_file, ") -> dependence analysis failed\n");
5758       else
5759         fprintf (dump_file, ")\n");
5760     }
5761 }
5762
5763 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
5764    the data references in DATAREFS, in the LOOP_NEST.  When
5765    COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
5766    relations.  Return true when successful, i.e. data references number
5767    is small enough to be handled.  */
5768
5769 bool
5770 compute_all_dependences (const vec<data_reference_p> &datarefs,
5771                          vec<ddr_p> *dependence_relations,
5772                          const vec<loop_p> &loop_nest,
5773                          bool compute_self_and_rr)
5774 {
5775   struct data_dependence_relation *ddr;
5776   struct data_reference *a, *b;
5777   unsigned int i, j;
5778
5779   if ((int) datarefs.length ()
5780       > param_loop_max_datarefs_for_datadeps)
5781     {
5782       struct data_dependence_relation *ddr;
5783
5784       /* Insert a single relation into dependence_relations:
5785          chrec_dont_know.  */
5786       ddr = initialize_data_dependence_relation (NULL, NULL, loop_nest);
5787       dependence_relations->safe_push (ddr);
5788       return false;
5789     }
5790
5791   FOR_EACH_VEC_ELT (datarefs, i, a)
5792     for (j = i + 1; datarefs.iterate (j, &b); j++)
5793       if (DR_IS_WRITE (a) || DR_IS_WRITE (b) || compute_self_and_rr)
5794         {
5795           ddr = initialize_data_dependence_relation (a, b, loop_nest);
5796           dependence_relations->safe_push (ddr);
5797           if (loop_nest.exists ())
5798             compute_affine_dependence (ddr, loop_nest[0]);
5799         }
5800
5801   if (compute_self_and_rr)
5802     FOR_EACH_VEC_ELT (datarefs, i, a)
5803       {
5804         ddr = initialize_data_dependence_relation (a, a, loop_nest);
5805         dependence_relations->safe_push (ddr);
5806         if (loop_nest.exists ())
5807           compute_affine_dependence (ddr, loop_nest[0]);
5808       }
5809
5810   return true;
5811 }
5812
5813 /* Describes a location of a memory reference.  */
5814
5815 struct data_ref_loc
5816 {
5817   /* The memory reference.  */
5818   tree ref;
5819
5820   /* True if the memory reference is read.  */
5821   bool is_read;
5822
5823   /* True if the data reference is conditional within the containing
5824      statement, i.e. if it might not occur even when the statement
5825      is executed and runs to completion.  */
5826   bool is_conditional_in_stmt;
5827 };
5828
5829
5830 /* Stores the locations of memory references in STMT to REFERENCES.  Returns
5831    true if STMT clobbers memory, false otherwise.  */
5832
5833 static bool
5834 get_references_in_stmt (gimple *stmt, vec<data_ref_loc, va_heap> *references)
5835 {
5836   bool clobbers_memory = false;
5837   data_ref_loc ref;
5838   tree op0, op1;
5839   enum gimple_code stmt_code = gimple_code (stmt);
5840
5841   /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
5842      As we cannot model data-references to not spelled out
5843      accesses give up if they may occur.  */
5844   if (stmt_code == GIMPLE_CALL
5845       && !(gimple_call_flags (stmt) & ECF_CONST))
5846     {
5847       /* Allow IFN_GOMP_SIMD_LANE in their own loops.  */
5848       if (gimple_call_internal_p (stmt))
5849         switch (gimple_call_internal_fn (stmt))
5850           {
5851           case IFN_GOMP_SIMD_LANE:
5852             {
5853               class loop *loop = gimple_bb (stmt)->loop_father;
5854               tree uid = gimple_call_arg (stmt, 0);
5855               gcc_assert (TREE_CODE (uid) == SSA_NAME);
5856               if (loop == NULL
5857                   || loop->simduid != SSA_NAME_VAR (uid))
5858                 clobbers_memory = true;
5859               break;
5860             }
5861           case IFN_MASK_LOAD:
5862           case IFN_MASK_STORE:
5863           break;
5864           case IFN_MASK_CALL:
5865             {
5866               tree orig_fndecl
5867                 = gimple_call_addr_fndecl (gimple_call_arg (stmt, 0));
5868               if (!orig_fndecl
5869                   || (flags_from_decl_or_type (orig_fndecl) & ECF_CONST) == 0)
5870                 clobbers_memory = true;
5871             }
5872             break;
5873           default:
5874             clobbers_memory = true;
5875             break;
5876           }
5877       else if (gimple_call_builtin_p (stmt, BUILT_IN_PREFETCH))
5878         clobbers_memory = false;
5879       else
5880         clobbers_memory = true;
5881     }
5882   else if (stmt_code == GIMPLE_ASM
5883            && (gimple_asm_volatile_p (as_a <gasm *> (stmt))
5884                || gimple_vuse (stmt)))
5885     clobbers_memory = true;
5886
5887   if (!gimple_vuse (stmt))
5888     return clobbers_memory;
5889
5890   if (stmt_code == GIMPLE_ASSIGN)
5891     {
5892       tree base;
5893       op0 = gimple_assign_lhs (stmt);
5894       op1 = gimple_assign_rhs1 (stmt);
5895
5896       if (DECL_P (op1)
5897           || (REFERENCE_CLASS_P (op1)
5898               && (base = get_base_address (op1))
5899               && TREE_CODE (base) != SSA_NAME
5900               && !is_gimple_min_invariant (base)))
5901         {
5902           ref.ref = op1;
5903           ref.is_read = true;
5904           ref.is_conditional_in_stmt = false;
5905           references->safe_push (ref);
5906         }
5907     }
5908   else if (stmt_code == GIMPLE_CALL)
5909     {
5910       unsigned i = 0, n;
5911       tree ptr, type;
5912       unsigned int align;
5913
5914       ref.is_read = false;
5915       if (gimple_call_internal_p (stmt))
5916         switch (gimple_call_internal_fn (stmt))
5917           {
5918           case IFN_MASK_LOAD:
5919             if (gimple_call_lhs (stmt) == NULL_TREE)
5920               break;
5921             ref.is_read = true;
5922             /* FALLTHRU */
5923           case IFN_MASK_STORE:
5924             ptr = build_int_cst (TREE_TYPE (gimple_call_arg (stmt, 1)), 0);
5925             align = tree_to_shwi (gimple_call_arg (stmt, 1));
5926             if (ref.is_read)
5927               type = TREE_TYPE (gimple_call_lhs (stmt));
5928             else
5929               type = TREE_TYPE (gimple_call_arg (stmt, 3));
5930             if (TYPE_ALIGN (type) != align)
5931               type = build_aligned_type (type, align);
5932             ref.is_conditional_in_stmt = true;
5933             ref.ref = fold_build2 (MEM_REF, type, gimple_call_arg (stmt, 0),
5934                                    ptr);
5935             references->safe_push (ref);
5936             return false;
5937           case IFN_MASK_CALL:
5938             i = 1;
5939             gcc_fallthrough ();
5940           default:
5941             break;
5942           }
5943
5944       op0 = gimple_call_lhs (stmt);
5945       n = gimple_call_num_args (stmt);
5946       for (; i < n; i++)
5947         {
5948           op1 = gimple_call_arg (stmt, i);
5949
5950           if (DECL_P (op1)
5951               || (REFERENCE_CLASS_P (op1) && get_base_address (op1)))
5952             {
5953               ref.ref = op1;
5954               ref.is_read = true;
5955               ref.is_conditional_in_stmt = false;
5956               references->safe_push (ref);
5957             }
5958         }
5959     }
5960   else
5961     return clobbers_memory;
5962
5963   if (op0
5964       && (DECL_P (op0)
5965           || (REFERENCE_CLASS_P (op0) && get_base_address (op0))))
5966     {
5967       ref.ref = op0;
5968       ref.is_read = false;
5969       ref.is_conditional_in_stmt = false;
5970       references->safe_push (ref);
5971     }
5972   return clobbers_memory;
5973 }
5974
5975
5976 /* Returns true if the loop-nest has any data reference.  */
5977
5978 bool
5979 loop_nest_has_data_refs (loop_p loop)
5980 {
5981   basic_block *bbs = get_loop_body (loop);
5982   auto_vec<data_ref_loc, 3> references;
5983
5984   for (unsigned i = 0; i < loop->num_nodes; i++)
5985     {
5986       basic_block bb = bbs[i];
5987       gimple_stmt_iterator bsi;
5988
5989       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
5990         {
5991           gimple *stmt = gsi_stmt (bsi);
5992           get_references_in_stmt (stmt, &references);
5993           if (references.length ())
5994             {
5995               free (bbs);
5996               return true;
5997             }
5998         }
5999     }
6000   free (bbs);
6001   return false;
6002 }
6003
6004 /* Stores the data references in STMT to DATAREFS.  If there is an unanalyzable
6005    reference, returns false, otherwise returns true.  NEST is the outermost
6006    loop of the loop nest in which the references should be analyzed.  */
6007
6008 opt_result
6009 find_data_references_in_stmt (class loop *nest, gimple *stmt,
6010                               vec<data_reference_p> *datarefs)
6011 {
6012   auto_vec<data_ref_loc, 2> references;
6013   data_reference_p dr;
6014
6015   if (get_references_in_stmt (stmt, &references))
6016     return opt_result::failure_at (stmt, "statement clobbers memory: %G",
6017                                    stmt);
6018
6019   for (const data_ref_loc &ref : references)
6020     {
6021       dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
6022                             loop_containing_stmt (stmt), ref.ref,
6023                             stmt, ref.is_read, ref.is_conditional_in_stmt);
6024       gcc_assert (dr != NULL);
6025       datarefs->safe_push (dr);
6026     }
6027
6028   return opt_result::success ();
6029 }
6030
6031 /* Stores the data references in STMT to DATAREFS.  If there is an
6032    unanalyzable reference, returns false, otherwise returns true.
6033    NEST is the outermost loop of the loop nest in which the references
6034    should be instantiated, LOOP is the loop in which the references
6035    should be analyzed.  */
6036
6037 bool
6038 graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
6039                                        vec<data_reference_p> *datarefs)
6040 {
6041   auto_vec<data_ref_loc, 2> references;
6042   bool ret = true;
6043   data_reference_p dr;
6044
6045   if (get_references_in_stmt (stmt, &references))
6046     return false;
6047
6048   for (const data_ref_loc &ref : references)
6049     {
6050       dr = create_data_ref (nest, loop, ref.ref, stmt, ref.is_read,
6051                             ref.is_conditional_in_stmt);
6052       gcc_assert (dr != NULL);
6053       datarefs->safe_push (dr);
6054     }
6055
6056   return ret;
6057 }
6058
6059 /* Search the data references in LOOP, and record the information into
6060    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6061    difficult case, returns NULL_TREE otherwise.  */
6062
6063 tree
6064 find_data_references_in_bb (class loop *loop, basic_block bb,
6065                             vec<data_reference_p> *datarefs)
6066 {
6067   gimple_stmt_iterator bsi;
6068
6069   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
6070     {
6071       gimple *stmt = gsi_stmt (bsi);
6072
6073       if (!find_data_references_in_stmt (loop, stmt, datarefs))
6074         {
6075           struct data_reference *res;
6076           res = XCNEW (struct data_reference);
6077           datarefs->safe_push (res);
6078
6079           return chrec_dont_know;
6080         }
6081     }
6082
6083   return NULL_TREE;
6084 }
6085
6086 /* Search the data references in LOOP, and record the information into
6087    DATAREFS.  Returns chrec_dont_know when failing to analyze a
6088    difficult case, returns NULL_TREE otherwise.
6089
6090    TODO: This function should be made smarter so that it can handle address
6091    arithmetic as if they were array accesses, etc.  */
6092
6093 tree
6094 find_data_references_in_loop (class loop *loop,
6095                               vec<data_reference_p> *datarefs)
6096 {
6097   basic_block bb, *bbs;
6098   unsigned int i;
6099
6100   bbs = get_loop_body_in_dom_order (loop);
6101
6102   for (i = 0; i < loop->num_nodes; i++)
6103     {
6104       bb = bbs[i];
6105
6106       if (find_data_references_in_bb (loop, bb, datarefs) == chrec_dont_know)
6107         {
6108           free (bbs);
6109           return chrec_dont_know;
6110         }
6111     }
6112   free (bbs);
6113
6114   return NULL_TREE;
6115 }
6116
6117 /* Return the alignment in bytes that DRB is guaranteed to have at all
6118    times.  */
6119
6120 unsigned int
6121 dr_alignment (innermost_loop_behavior *drb)
6122 {
6123   /* Get the alignment of BASE_ADDRESS + INIT.  */
6124   unsigned int alignment = drb->base_alignment;
6125   unsigned int misalignment = (drb->base_misalignment
6126                                + TREE_INT_CST_LOW (drb->init));
6127   if (misalignment != 0)
6128     alignment = MIN (alignment, misalignment & -misalignment);
6129
6130   /* Cap it to the alignment of OFFSET.  */
6131   if (!integer_zerop (drb->offset))
6132     alignment = MIN (alignment, drb->offset_alignment);
6133
6134   /* Cap it to the alignment of STEP.  */
6135   if (!integer_zerop (drb->step))
6136     alignment = MIN (alignment, drb->step_alignment);
6137
6138   return alignment;
6139 }
6140
6141 /* If BASE is a pointer-typed SSA name, try to find the object that it
6142    is based on.  Return this object X on success and store the alignment
6143    in bytes of BASE - &X in *ALIGNMENT_OUT.  */
6144
6145 static tree
6146 get_base_for_alignment_1 (tree base, unsigned int *alignment_out)
6147 {
6148   if (TREE_CODE (base) != SSA_NAME || !POINTER_TYPE_P (TREE_TYPE (base)))
6149     return NULL_TREE;
6150
6151   gimple *def = SSA_NAME_DEF_STMT (base);
6152   base = analyze_scalar_evolution (loop_containing_stmt (def), base);
6153
6154   /* Peel chrecs and record the minimum alignment preserved by
6155      all steps.  */
6156   unsigned int alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6157   while (TREE_CODE (base) == POLYNOMIAL_CHREC)
6158     {
6159       unsigned int step_alignment = highest_pow2_factor (CHREC_RIGHT (base));
6160       alignment = MIN (alignment, step_alignment);
6161       base = CHREC_LEFT (base);
6162     }
6163
6164   /* Punt if the expression is too complicated to handle.  */
6165   if (tree_contains_chrecs (base, NULL) || !POINTER_TYPE_P (TREE_TYPE (base)))
6166     return NULL_TREE;
6167
6168   /* The only useful cases are those for which a dereference folds to something
6169      other than an INDIRECT_REF.  */
6170   tree ref_type = TREE_TYPE (TREE_TYPE (base));
6171   tree ref = fold_indirect_ref_1 (UNKNOWN_LOCATION, ref_type, base);
6172   if (!ref)
6173     return NULL_TREE;
6174
6175   /* Analyze the base to which the steps we peeled were applied.  */
6176   poly_int64 bitsize, bitpos, bytepos;
6177   machine_mode mode;
6178   int unsignedp, reversep, volatilep;
6179   tree offset;
6180   base = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
6181                               &unsignedp, &reversep, &volatilep);
6182   if (!base || !multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
6183     return NULL_TREE;
6184
6185   /* Restrict the alignment to that guaranteed by the offsets.  */
6186   unsigned int bytepos_alignment = known_alignment (bytepos);
6187   if (bytepos_alignment != 0)
6188     alignment = MIN (alignment, bytepos_alignment);
6189   if (offset)
6190     {
6191       unsigned int offset_alignment = highest_pow2_factor (offset);
6192       alignment = MIN (alignment, offset_alignment);
6193     }
6194
6195   *alignment_out = alignment;
6196   return base;
6197 }
6198
6199 /* Return the object whose alignment would need to be changed in order
6200    to increase the alignment of ADDR.  Store the maximum achievable
6201    alignment in *MAX_ALIGNMENT.  */
6202
6203 tree
6204 get_base_for_alignment (tree addr, unsigned int *max_alignment)
6205 {
6206   tree base = get_base_for_alignment_1 (addr, max_alignment);
6207   if (base)
6208     return base;
6209
6210   if (TREE_CODE (addr) == ADDR_EXPR)
6211     addr = TREE_OPERAND (addr, 0);
6212   *max_alignment = MAX_OFILE_ALIGNMENT / BITS_PER_UNIT;
6213   return addr;
6214 }
6215
6216 /* Recursive helper function.  */
6217
6218 static bool
6219 find_loop_nest_1 (class loop *loop, vec<loop_p> *loop_nest)
6220 {
6221   /* Inner loops of the nest should not contain siblings.  Example:
6222      when there are two consecutive loops,
6223
6224      | loop_0
6225      |   loop_1
6226      |     A[{0, +, 1}_1]
6227      |   endloop_1
6228      |   loop_2
6229      |     A[{0, +, 1}_2]
6230      |   endloop_2
6231      | endloop_0
6232
6233      the dependence relation cannot be captured by the distance
6234      abstraction.  */
6235   if (loop->next)
6236     return false;
6237
6238   loop_nest->safe_push (loop);
6239   if (loop->inner)
6240     return find_loop_nest_1 (loop->inner, loop_nest);
6241   return true;
6242 }
6243
6244 /* Return false when the LOOP is not well nested.  Otherwise return
6245    true and insert in LOOP_NEST the loops of the nest.  LOOP_NEST will
6246    contain the loops from the outermost to the innermost, as they will
6247    appear in the classic distance vector.  */
6248
6249 bool
6250 find_loop_nest (class loop *loop, vec<loop_p> *loop_nest)
6251 {
6252   loop_nest->safe_push (loop);
6253   if (loop->inner)
6254     return find_loop_nest_1 (loop->inner, loop_nest);
6255   return true;
6256 }
6257
6258 /* Returns true when the data dependences have been computed, false otherwise.
6259    Given a loop nest LOOP, the following vectors are returned:
6260    DATAREFS is initialized to all the array elements contained in this loop,
6261    DEPENDENCE_RELATIONS contains the relations between the data references.
6262    Compute read-read and self relations if
6263    COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE.  */
6264
6265 bool
6266 compute_data_dependences_for_loop (class loop *loop,
6267                                    bool compute_self_and_read_read_dependences,
6268                                    vec<loop_p> *loop_nest,
6269                                    vec<data_reference_p> *datarefs,
6270                                    vec<ddr_p> *dependence_relations)
6271 {
6272   bool res = true;
6273
6274   memset (&dependence_stats, 0, sizeof (dependence_stats));
6275
6276   /* If the loop nest is not well formed, or one of the data references
6277      is not computable, give up without spending time to compute other
6278      dependences.  */
6279   if (!loop
6280       || !find_loop_nest (loop, loop_nest)
6281       || find_data_references_in_loop (loop, datarefs) == chrec_dont_know
6282       || !compute_all_dependences (*datarefs, dependence_relations, *loop_nest,
6283                                    compute_self_and_read_read_dependences))
6284     res = false;
6285
6286   if (dump_file && (dump_flags & TDF_STATS))
6287     {
6288       fprintf (dump_file, "Dependence tester statistics:\n");
6289
6290       fprintf (dump_file, "Number of dependence tests: %d\n",
6291                dependence_stats.num_dependence_tests);
6292       fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
6293                dependence_stats.num_dependence_dependent);
6294       fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
6295                dependence_stats.num_dependence_independent);
6296       fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
6297                dependence_stats.num_dependence_undetermined);
6298
6299       fprintf (dump_file, "Number of subscript tests: %d\n",
6300                dependence_stats.num_subscript_tests);
6301       fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
6302                dependence_stats.num_subscript_undetermined);
6303       fprintf (dump_file, "Number of same subscript function: %d\n",
6304                dependence_stats.num_same_subscript_function);
6305
6306       fprintf (dump_file, "Number of ziv tests: %d\n",
6307                dependence_stats.num_ziv);
6308       fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
6309                dependence_stats.num_ziv_dependent);
6310       fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
6311                dependence_stats.num_ziv_independent);
6312       fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
6313                dependence_stats.num_ziv_unimplemented);
6314
6315       fprintf (dump_file, "Number of siv tests: %d\n",
6316                dependence_stats.num_siv);
6317       fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
6318                dependence_stats.num_siv_dependent);
6319       fprintf (dump_file, "Number of siv tests returning independent: %d\n",
6320                dependence_stats.num_siv_independent);
6321       fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
6322                dependence_stats.num_siv_unimplemented);
6323
6324       fprintf (dump_file, "Number of miv tests: %d\n",
6325                dependence_stats.num_miv);
6326       fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
6327                dependence_stats.num_miv_dependent);
6328       fprintf (dump_file, "Number of miv tests returning independent: %d\n",
6329                dependence_stats.num_miv_independent);
6330       fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
6331                dependence_stats.num_miv_unimplemented);
6332     }
6333
6334   return res;
6335 }
6336
6337 /* Free the memory used by a data dependence relation DDR.  */
6338
6339 void
6340 free_dependence_relation (struct data_dependence_relation *ddr)
6341 {
6342   if (ddr == NULL)
6343     return;
6344
6345   if (DDR_SUBSCRIPTS (ddr).exists ())
6346     free_subscripts (DDR_SUBSCRIPTS (ddr));
6347   DDR_DIST_VECTS (ddr).release ();
6348   DDR_DIR_VECTS (ddr).release ();
6349
6350   free (ddr);
6351 }
6352
6353 /* Free the memory used by the data dependence relations from
6354    DEPENDENCE_RELATIONS.  */
6355
6356 void
6357 free_dependence_relations (vec<ddr_p>& dependence_relations)
6358 {
6359   for (data_dependence_relation *ddr : dependence_relations)
6360     if (ddr)
6361       free_dependence_relation (ddr);
6362
6363   dependence_relations.release ();
6364 }
6365
6366 /* Free the memory used by the data references from DATAREFS.  */
6367
6368 void
6369 free_data_refs (vec<data_reference_p>& datarefs)
6370 {
6371   for (data_reference *dr : datarefs)
6372     free_data_ref (dr);
6373   datarefs.release ();
6374 }
6375
6376 /* Common routine implementing both dr_direction_indicator and
6377    dr_zero_step_indicator.  Return USEFUL_MIN if the indicator is known
6378    to be >= USEFUL_MIN and -1 if the indicator is known to be negative.
6379    Return the step as the indicator otherwise.  */
6380
6381 static tree
6382 dr_step_indicator (struct data_reference *dr, int useful_min)
6383 {
6384   tree step = DR_STEP (dr);
6385   if (!step)
6386     return NULL_TREE;
6387   STRIP_NOPS (step);
6388   /* Look for cases where the step is scaled by a positive constant
6389      integer, which will often be the access size.  If the multiplication
6390      doesn't change the sign (due to overflow effects) then we can
6391      test the unscaled value instead.  */
6392   if (TREE_CODE (step) == MULT_EXPR
6393       && TREE_CODE (TREE_OPERAND (step, 1)) == INTEGER_CST
6394       && tree_int_cst_sgn (TREE_OPERAND (step, 1)) > 0)
6395     {
6396       tree factor = TREE_OPERAND (step, 1);
6397       step = TREE_OPERAND (step, 0);
6398
6399       /* Strip widening and truncating conversions as well as nops.  */
6400       if (CONVERT_EXPR_P (step)
6401           && INTEGRAL_TYPE_P (TREE_TYPE (TREE_OPERAND (step, 0))))
6402         step = TREE_OPERAND (step, 0);
6403       tree type = TREE_TYPE (step);
6404
6405       /* Get the range of step values that would not cause overflow.  */
6406       widest_int minv = (wi::to_widest (TYPE_MIN_VALUE (ssizetype))
6407                          / wi::to_widest (factor));
6408       widest_int maxv = (wi::to_widest (TYPE_MAX_VALUE (ssizetype))
6409                          / wi::to_widest (factor));
6410
6411       /* Get the range of values that the unconverted step actually has.  */
6412       wide_int step_min, step_max;
6413       int_range_max vr;
6414       if (TREE_CODE (step) != SSA_NAME
6415           || !get_range_query (cfun)->range_of_expr (vr, step)
6416           || vr.undefined_p ())
6417         {
6418           step_min = wi::to_wide (TYPE_MIN_VALUE (type));
6419           step_max = wi::to_wide (TYPE_MAX_VALUE (type));
6420         }
6421       else
6422         {
6423           step_min = vr.lower_bound ();
6424           step_max = vr.upper_bound ();
6425         }
6426
6427       /* Check whether the unconverted step has an acceptable range.  */
6428       signop sgn = TYPE_SIGN (type);
6429       if (wi::les_p (minv, widest_int::from (step_min, sgn))
6430           && wi::ges_p (maxv, widest_int::from (step_max, sgn)))
6431         {
6432           if (wi::ge_p (step_min, useful_min, sgn))
6433             return ssize_int (useful_min);
6434           else if (wi::lt_p (step_max, 0, sgn))
6435             return ssize_int (-1);
6436           else
6437             return fold_convert (ssizetype, step);
6438         }
6439     }
6440   return DR_STEP (dr);
6441 }
6442
6443 /* Return a value that is negative iff DR has a negative step.  */
6444
6445 tree
6446 dr_direction_indicator (struct data_reference *dr)
6447 {
6448   return dr_step_indicator (dr, 0);
6449 }
6450
6451 /* Return a value that is zero iff DR has a zero step.  */
6452
6453 tree
6454 dr_zero_step_indicator (struct data_reference *dr)
6455 {
6456   return dr_step_indicator (dr, 1);
6457 }
6458
6459 /* Return true if DR is known to have a nonnegative (but possibly zero)
6460    step.  */
6461
6462 bool
6463 dr_known_forward_stride_p (struct data_reference *dr)
6464 {
6465   tree indicator = dr_direction_indicator (dr);
6466   tree neg_step_val = fold_binary (LT_EXPR, boolean_type_node,
6467                                    fold_convert (ssizetype, indicator),
6468                                    ssize_int (0));
6469   return neg_step_val && integer_zerop (neg_step_val);
6470 }