gcc/tree-ssa-loop-ivopts.cc

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2025 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.
  68
  69    For the targets supporting low-overhead loops, IVOPTs has to take care of
  70    the loops which will probably be transformed in RTL doloop optimization,
  71    to try to make selected IV candidate set optimal.  The process of doloop
  72    support includes:
  73
  74    1) Analyze the current loop will be transformed to doloop or not, find and
  75       mark its compare type IV use as doloop use (iv_group field doloop_p), and
  76       set flag doloop_use_p of ivopts_data to notify subsequent processings on
  77       doloop.  See analyze_and_mark_doloop_use and its callees for the details.
  78       The target hook predict_doloop_p can be used for target specific checks.
  79
  80    2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
  81       set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
  82       like biv.  For cost determination between doloop IV cand and IV use, the
  83       target hooks doloop_cost_for_generic and doloop_cost_for_address are
  84       provided to add on extra costs for generic type and address type IV use.
  85       Zero cost is assigned to the pair between doloop IV cand and doloop IV
  86       use, and bound zero is set for IV elimination.
  87
  88    3) With the cost setting in step 2), the current cost model based IV
  89       selection algorithm will process as usual, pick up doloop dedicated IV if
  90       profitable.  */
  91
  92 #include "config.h"
  93 #include "system.h"
  94 #include "coretypes.h"
  95 #include "backend.h"
  96 #include "rtl.h"
  97 #include "tree.h"
  98 #include "gimple.h"
  99 #include "cfghooks.h"
 100 #include "tree-pass.h"
 101 #include "memmodel.h"
 102 #include "tm_p.h"
 103 #include "ssa.h"
 104 #include "expmed.h"
 105 #include "insn-config.h"
 106 #include "emit-rtl.h"
 107 #include "recog.h"
 108 #include "cgraph.h"
 109 #include "gimple-pretty-print.h"
 110 #include "alias.h"
 111 #include "fold-const.h"
 112 #include "stor-layout.h"
 113 #include "tree-eh.h"
 114 #include "gimplify.h"
 115 #include "gimple-iterator.h"
 116 #include "gimplify-me.h"
 117 #include "tree-cfg.h"
 118 #include "tree-ssa-loop-ivopts.h"
 119 #include "tree-ssa-loop-manip.h"
 120 #include "tree-ssa-loop-niter.h"
 121 #include "tree-ssa-loop.h"
 122 #include "explow.h"
 123 #include "expr.h"
 124 #include "tree-dfa.h"
 125 #include "tree-ssa.h"
 126 #include "cfgloop.h"
 127 #include "tree-scalar-evolution.h"
 128 #include "tree-affine.h"
 129 #include "tree-ssa-propagate.h"
 130 #include "tree-ssa-address.h"
 131 #include "builtins.h"
 132 #include "tree-vectorizer.h"
 133 #include "dbgcnt.h"
 134 #include "cfganal.h"
 135
 136 /* For lang_hooks.types.type_for_mode.  */
 137 #include "langhooks.h"
 138
 139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 140    cost of different addressing modes.  This should be moved to a TBD
 141    interface between the GIMPLE and RTL worlds.  */
 142
 143 /* The infinite cost.  */
 144 #define INFTY 1000000000
 145
 146 /* Returns the expected number of loop iterations for LOOP.
 147    The average trip count is computed from profile data if it
 148    exists. */
 149
 150 static inline HOST_WIDE_INT
 151 avg_loop_niter (class loop *loop)
 152 {
 153   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 154   if (niter == -1)
 155     {
 156       niter = likely_max_stmt_executions_int (loop);
 157
 158       if (niter == -1 || niter > param_avg_loop_niter)
 159         return param_avg_loop_niter;
 160     }
 161
 162   return niter;
 163 }
 164
 165 struct iv_use;
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 175   bool biv_p;           /* Is it a biv?  */
 176   bool no_overflow;     /* True if the iv doesn't overflow.  */
 177   bool have_address_use;/* For biv, indicate if it's used in any address
 178                            type use.  */
 179 };
 180
 181 /* Per-ssa version information (induction variable descriptions, etc.).  */
 182 struct version_info
 183 {
 184   tree name;            /* The ssa name.  */
 185   struct iv *iv;        /* Induction variable description.  */
 186   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 187                            an expression that is not an induction variable.  */
 188   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 189   unsigned inv_id;      /* Id of an invariant.  */
 190 };
 191
 192 /* Types of uses.  */
 193 enum use_type
 194 {
 195   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 196   USE_REF_ADDRESS,      /* Use is an address for an explicit memory
 197                            reference.  */
 198   USE_PTR_ADDRESS,      /* Use is a pointer argument to a function in
 199                            cases where the expansion of the function
 200                            will turn the argument into a normal address.  */
 201   USE_COMPARE           /* Use is a compare.  */
 202 };
 203
 204 /* Cost of a computation.  */
 205 class comp_cost
 206 {
 207 public:
 208   comp_cost (): cost (0), complexity (0), scratch (0)
 209   {}
 210
 211   comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
 212     : cost (cost), complexity (complexity), scratch (scratch)
 213   {}
 214
 215   /* Returns true if COST is infinite.  */
 216   bool infinite_cost_p ();
 217
 218   /* Adds costs COST1 and COST2.  */
 219   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 220
 221   /* Adds COST to the comp_cost.  */
 222   comp_cost operator+= (comp_cost cost);
 223
 224   /* Adds constant C to this comp_cost.  */
 225   comp_cost operator+= (HOST_WIDE_INT c);
 226
 227   /* Subtracts constant C to this comp_cost.  */
 228   comp_cost operator-= (HOST_WIDE_INT c);
 229
 230   /* Divide the comp_cost by constant C.  */
 231   comp_cost operator/= (HOST_WIDE_INT c);
 232
 233   /* Multiply the comp_cost by constant C.  */
 234   comp_cost operator*= (HOST_WIDE_INT c);
 235
 236   /* Subtracts costs COST1 and COST2.  */
 237   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 238
 239   /* Subtracts COST from this comp_cost.  */
 240   comp_cost operator-= (comp_cost cost);
 241
 242   /* Returns true if COST1 is smaller than COST2.  */
 243   friend bool operator< (comp_cost cost1, comp_cost cost2);
 244
 245   /* Returns true if COST1 and COST2 are equal.  */
 246   friend bool operator== (comp_cost cost1, comp_cost cost2);
 247
 248   /* Returns true if COST1 is smaller or equal than COST2.  */
 249   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 250
 251   int64_t cost;         /* The runtime cost.  */
 252   unsigned complexity;  /* The estimate of the complexity of the code for
 253                            the computation (in no concrete units --
 254                            complexity field should be larger for more
 255                            complex expressions and addressing modes).  */
 256   int64_t scratch;      /* Scratch used during cost computation.  */
 257 };
 258
 259 static const comp_cost no_cost;
 260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
 261
 262 bool
 263 comp_cost::infinite_cost_p ()
 264 {
 265   return cost == INFTY;
 266 }
 267
 268 comp_cost
 269 operator+ (comp_cost cost1, comp_cost cost2)
 270 {
 271   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 272     return infinite_cost;
 273
 274   gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
 275   cost1.cost += cost2.cost;
 276   cost1.complexity += cost2.complexity;
 277
 278   return cost1;
 279 }
 280
 281 comp_cost
 282 operator- (comp_cost cost1, comp_cost cost2)
 283 {
 284   if (cost1.infinite_cost_p ())
 285     return infinite_cost;
 286
 287   gcc_assert (!cost2.infinite_cost_p ());
 288   gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
 289
 290   cost1.cost -= cost2.cost;
 291   cost1.complexity -= cost2.complexity;
 292
 293   return cost1;
 294 }
 295
 296 comp_cost
 297 comp_cost::operator+= (comp_cost cost)
 298 {
 299   *this = *this + cost;
 300   return *this;
 301 }
 302
 303 comp_cost
 304 comp_cost::operator+= (HOST_WIDE_INT c)
 305 {
 306   if (c >= INFTY)
 307     this->cost = INFTY;
 308
 309   if (infinite_cost_p ())
 310     return *this;
 311
 312   gcc_assert (this->cost + c < infinite_cost.cost);
 313   this->cost += c;
 314
 315   return *this;
 316 }
 317
 318 comp_cost
 319 comp_cost::operator-= (HOST_WIDE_INT c)
 320 {
 321   if (infinite_cost_p ())
 322     return *this;
 323
 324   gcc_assert (this->cost - c < infinite_cost.cost);
 325   this->cost -= c;
 326
 327   return *this;
 328 }
 329
 330 comp_cost
 331 comp_cost::operator/= (HOST_WIDE_INT c)
 332 {
 333   gcc_assert (c != 0);
 334   if (infinite_cost_p ())
 335     return *this;
 336
 337   this->cost /= c;
 338
 339   return *this;
 340 }
 341
 342 comp_cost
 343 comp_cost::operator*= (HOST_WIDE_INT c)
 344 {
 345   if (infinite_cost_p ())
 346     return *this;
 347
 348   gcc_assert (this->cost * c < infinite_cost.cost);
 349   this->cost *= c;
 350
 351   return *this;
 352 }
 353
 354 comp_cost
 355 comp_cost::operator-= (comp_cost cost)
 356 {
 357   *this = *this - cost;
 358   return *this;
 359 }
 360
 361 bool
 362 operator< (comp_cost cost1, comp_cost cost2)
 363 {
 364   if (cost1.cost == cost2.cost)
 365     return cost1.complexity < cost2.complexity;
 366
 367   return cost1.cost < cost2.cost;
 368 }
 369
 370 bool
 371 operator== (comp_cost cost1, comp_cost cost2)
 372 {
 373   return cost1.cost == cost2.cost
 374     && cost1.complexity == cost2.complexity;
 375 }
 376
 377 bool
 378 operator<= (comp_cost cost1, comp_cost cost2)
 379 {
 380   return cost1 < cost2 || cost1 == cost2;
 381 }
 382
 383 struct iv_inv_expr_ent;
 384
 385 /* The candidate - cost pair.  */
 386 class cost_pair
 387 {
 388 public:
 389   struct iv_cand *cand; /* The candidate.  */
 390   comp_cost cost;       /* The cost.  */
 391   enum tree_code comp;  /* For iv elimination, the comparison.  */
 392   bitmap inv_vars;      /* The list of invariant ssa_vars that have to be
 393                            preserved when representing iv_use with iv_cand.  */
 394   bitmap inv_exprs;     /* The list of newly created invariant expressions
 395                            when representing iv_use with iv_cand.  */
 396   tree value;           /* For final value elimination, the expression for
 397                            the final value of the iv.  For iv elimination,
 398                            the new bound to compare with.  */
 399 };
 400
 401 /* Use.  */
 402 struct iv_use
 403 {
 404   unsigned id;          /* The id of the use.  */
 405   unsigned group_id;    /* The group id the use belongs to.  */
 406   enum use_type type;   /* Type of the use.  */
 407   tree mem_type;        /* The memory type to use when testing whether an
 408                            address is legitimate, and what the address's
 409                            cost is.  */
 410   struct iv *iv;        /* The induction variable it is based on.  */
 411   gimple *stmt;         /* Statement in that it occurs.  */
 412   tree *op_p;           /* The place where it occurs.  */
 413
 414   tree addr_base;       /* Base address with const offset stripped.  */
 415   poly_uint64 addr_offset;
 416                         /* Const offset stripped from base address.  */
 417 };
 418
 419 /* Group of uses.  */
 420 struct iv_group
 421 {
 422   /* The id of the group.  */
 423   unsigned id;
 424   /* Uses of the group are of the same type.  */
 425   enum use_type type;
 426   /* The set of "related" IV candidates, plus the important ones.  */
 427   bitmap related_cands;
 428   /* Number of IV candidates in the cost_map.  */
 429   unsigned n_map_members;
 430   /* The costs wrto the iv candidates.  */
 431   class cost_pair *cost_map;
 432   /* The selected candidate for the group.  */
 433   struct iv_cand *selected;
 434   /* To indicate this is a doloop use group.  */
 435   bool doloop_p;
 436   /* Uses in the group.  */
 437   vec<struct iv_use *> vuses;
 438 };
 439
 440 /* The position where the iv is computed.  */
 441 enum iv_position
 442 {
 443   IP_NORMAL,            /* At the end, just before the exit condition.  */
 444   IP_END,               /* At the end of the latch block.  */
 445   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 446   IP_AFTER_USE,         /* Immediately after a specific use.  */
 447   IP_ORIGINAL           /* The original biv.  */
 448 };
 449
 450 /* The induction variable candidate.  */
 451 struct iv_cand
 452 {
 453   unsigned id;          /* The number of the candidate.  */
 454   bool important;       /* Whether this is an "important" candidate, i.e. such
 455                            that it should be considered by all uses.  */
 456   bool involves_undefs; /* Whether the IV involves undefined values.  */
 457   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 458   gimple *incremented_at;/* For original biv, the statement where it is
 459                            incremented.  */
 460   tree var_before;      /* The variable used for it before increment.  */
 461   tree var_after;       /* The variable used for it after increment.  */
 462   struct iv *iv;        /* The value of the candidate.  NULL for
 463                            "pseudocandidate" used to indicate the possibility
 464                            to replace the final value of an iv by direct
 465                            computation of the value.  */
 466   unsigned cost;        /* Cost of the candidate.  */
 467   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 468   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 469                               where it is incremented.  */
 470   bitmap inv_vars;      /* The list of invariant ssa_vars used in step of the
 471                            iv_cand.  */
 472   bitmap inv_exprs;     /* If step is more complicated than a single ssa_var,
 473                            handle it as a new invariant expression which will
 474                            be hoisted out of loop.  */
 475   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 476                            smaller type.  */
 477   bool doloop_p;        /* Whether this is a doloop candidate.  */
 478 };
 479
 480 /* Hashtable entry for common candidate derived from iv uses.  */
 481 class iv_common_cand
 482 {
 483 public:
 484   tree base;
 485   tree step;
 486   /* IV uses from which this common candidate is derived.  */
 487   auto_vec<struct iv_use *> uses;
 488   hashval_t hash;
 489 };
 490
 491 /* Hashtable helpers.  */
 492
 493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 494 {
 495   static inline hashval_t hash (const iv_common_cand *);
 496   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 497 };
 498
 499 /* Hash function for possible common candidates.  */
 500
 501 inline hashval_t
 502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 503 {
 504   return ccand->hash;
 505 }
 506
 507 /* Hash table equality function for common candidates.  */
 508
 509 inline bool
 510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 511                               const iv_common_cand *ccand2)
 512 {
 513   return (ccand1->hash == ccand2->hash
 514           && operand_equal_p (ccand1->base, ccand2->base, 0)
 515           && operand_equal_p (ccand1->step, ccand2->step, 0)
 516           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 517               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 518 }
 519
 520 /* Loop invariant expression hashtable entry.  */
 521
 522 struct iv_inv_expr_ent
 523 {
 524   /* Tree expression of the entry.  */
 525   tree expr;
 526   /* Unique indentifier.  */
 527   int id;
 528   /* Hash value.  */
 529   hashval_t hash;
 530 };
 531
 532 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 533
 534 static int
 535 sort_iv_inv_expr_ent (const void *a, const void *b)
 536 {
 537   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 538   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 539
 540   unsigned id1 = (*e1)->id;
 541   unsigned id2 = (*e2)->id;
 542
 543   if (id1 < id2)
 544     return -1;
 545   else if (id1 > id2)
 546     return 1;
 547   else
 548     return 0;
 549 }
 550
 551 /* Hashtable helpers.  */
 552
 553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 554 {
 555   static inline hashval_t hash (const iv_inv_expr_ent *);
 556   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 557 };
 558
 559 /* Return true if uses of type TYPE represent some form of address.  */
 560
 561 inline bool
 562 address_p (use_type type)
 563 {
 564   return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
 565 }
 566
 567 /* Hash function for loop invariant expressions.  */
 568
 569 inline hashval_t
 570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 571 {
 572   return expr->hash;
 573 }
 574
 575 /* Hash table equality function for expressions.  */
 576
 577 inline bool
 578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 579                            const iv_inv_expr_ent *expr2)
 580 {
 581   return expr1->hash == expr2->hash
 582          && operand_equal_p (expr1->expr, expr2->expr, 0);
 583 }
 584
 585 struct ivopts_data
 586 {
 587   /* The currently optimized loop.  */
 588   class loop *current_loop;
 589   location_t loop_loc;
 590
 591   /* Numbers of iterations for all exits of the current loop.  */
 592   hash_map<edge, tree_niter_desc *> *niters;
 593
 594   /* Number of registers used in it.  */
 595   unsigned regs_used;
 596
 597   /* The size of version_info array allocated.  */
 598   unsigned version_info_size;
 599
 600   /* The array of information for the ssa names.  */
 601   struct version_info *version_info;
 602
 603   /* The hashtable of loop invariant expressions created
 604      by ivopt.  */
 605   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 606
 607   /* The bitmap of indices in version_info whose value was changed.  */
 608   bitmap relevant;
 609
 610   /* The uses of induction variables.  */
 611   vec<iv_group *> vgroups;
 612
 613   /* The candidates.  */
 614   vec<iv_cand *> vcands;
 615
 616   /* A bitmap of important candidates.  */
 617   bitmap important_candidates;
 618
 619   /* Cache used by tree_to_aff_combination_expand.  */
 620   hash_map<tree, name_expansion *> *name_expansion_cache;
 621
 622   /* The hashtable of common candidates derived from iv uses.  */
 623   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 624
 625   /* The common candidates.  */
 626   vec<iv_common_cand *> iv_common_cands;
 627
 628   /* Hash map recording base object information of tree exp.  */
 629   hash_map<tree, tree> *base_object_map;
 630
 631   /* The maximum invariant variable id.  */
 632   unsigned max_inv_var_id;
 633
 634   /* The maximum invariant expression id.  */
 635   unsigned max_inv_expr_id;
 636
 637   /* Number of no_overflow BIVs which are not used in memory address.  */
 638   unsigned bivs_not_used_in_addr;
 639
 640   /* Obstack for iv structure.  */
 641   struct obstack iv_obstack;
 642
 643   /* Whether to consider just related and important candidates when replacing a
 644      use.  */
 645   bool consider_all_candidates;
 646
 647   /* Are we optimizing for speed?  */
 648   bool speed;
 649
 650   /* Whether the loop body includes any function calls.  */
 651   bool body_includes_call;
 652
 653   /* Whether the loop body can only be exited via single exit.  */
 654   bool loop_single_exit_p;
 655
 656   /* Whether the loop has doloop comparison use.  */
 657   bool doloop_use_p;
 658 };
 659
 660 /* An assignment of iv candidates to uses.  */
 661
 662 class iv_ca
 663 {
 664 public:
 665   /* The number of uses covered by the assignment.  */
 666   unsigned upto;
 667
 668   /* Number of uses that cannot be expressed by the candidates in the set.  */
 669   unsigned bad_groups;
 670
 671   /* Candidate assigned to a use, together with the related costs.  */
 672   class cost_pair **cand_for_group;
 673
 674   /* Number of times each candidate is used.  */
 675   unsigned *n_cand_uses;
 676
 677   /* The candidates used.  */
 678   bitmap cands;
 679
 680   /* The number of candidates in the set.  */
 681   unsigned n_cands;
 682
 683   /* The number of invariants needed, including both invariant variants and
 684      invariant expressions.  */
 685   unsigned n_invs;
 686
 687   /* Total cost of expressing uses.  */
 688   comp_cost cand_use_cost;
 689
 690   /* Total cost of candidates.  */
 691   int64_t cand_cost;
 692
 693   /* Number of times each invariant variable is used.  */
 694   unsigned *n_inv_var_uses;
 695
 696   /* Number of times each invariant expression is used.  */
 697   unsigned *n_inv_expr_uses;
 698
 699   /* Total cost of the assignment.  */
 700   comp_cost cost;
 701 };
 702
 703 /* Difference of two iv candidate assignments.  */
 704
 705 struct iv_ca_delta
 706 {
 707   /* Changed group.  */
 708   struct iv_group *group;
 709
 710   /* An old assignment (for rollback purposes).  */
 711   class cost_pair *old_cp;
 712
 713   /* A new assignment.  */
 714   class cost_pair *new_cp;
 715
 716   /* Next change in the list.  */
 717   struct iv_ca_delta *next;
 718 };
 719
 720 /* Bound on number of candidates below that all candidates are considered.  */
 721
 722 #define CONSIDER_ALL_CANDIDATES_BOUND \
 723   ((unsigned) param_iv_consider_all_candidates_bound)
 724
 725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 726    optimizing such a loop would help, and it would take ages).  */
 727
 728 #define MAX_CONSIDERED_GROUPS \
 729   ((unsigned) param_iv_max_considered_uses)
 730
 731 /* If there are at most this number of ivs in the set, try removing unnecessary
 732    ivs from the set always.  */
 733
 734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 735   ((unsigned) param_iv_always_prune_cand_set_bound)
 736
 737 /* The list of trees for that the decl_rtl field must be reset is stored
 738    here.  */
 739
 740 static vec<tree> decl_rtl_to_reset;
 741
 742 static comp_cost force_expr_to_var_cost (tree, bool);
 743
 744 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 745
 746 edge
 747 single_dom_exit (class loop *loop)
 748 {
 749   edge exit = single_exit (loop);
 750
 751   if (!exit)
 752     return NULL;
 753
 754   if (!just_once_each_iteration_p (loop, exit->src))
 755     return NULL;
 756
 757   return exit;
 758 }
 759
 760 /* Dumps information about the induction variable IV to FILE.  Don't dump
 761    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 762    preceding spaces indicated by INDENT_LEVEL.  */
 763
 764 void
 765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 766 {
 767   const char *p;
 768   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 769
 770   if (indent_level > 4)
 771     indent_level = 4;
 772   p = spaces + 8 - (indent_level << 1);
 773
 774   fprintf (file, "%sIV struct:\n", p);
 775   if (iv->ssa_name && dump_name)
 776     {
 777       fprintf (file, "%s  SSA_NAME:\t", p);
 778       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 779       fprintf (file, "\n");
 780     }
 781
 782   fprintf (file, "%s  Type:\t", p);
 783   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 784   fprintf (file, "\n");
 785
 786   fprintf (file, "%s  Base:\t", p);
 787   print_generic_expr (file, iv->base, TDF_SLIM);
 788   fprintf (file, "\n");
 789
 790   fprintf (file, "%s  Step:\t", p);
 791   print_generic_expr (file, iv->step, TDF_SLIM);
 792   fprintf (file, "\n");
 793
 794   if (iv->base_object)
 795     {
 796       fprintf (file, "%s  Object:\t", p);
 797       print_generic_expr (file, iv->base_object, TDF_SLIM);
 798       fprintf (file, "\n");
 799     }
 800
 801   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 802
 803   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 804            p, iv->no_overflow ? "No-overflow" : "Overflow");
 805 }
 806
 807 /* Dumps information about the USE to FILE.  */
 808
 809 void
 810 dump_use (FILE *file, struct iv_use *use)
 811 {
 812   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 813   fprintf (file, "    At stmt:\t");
 814   print_gimple_stmt (file, use->stmt, 0);
 815   fprintf (file, "    At pos:\t");
 816   if (use->op_p)
 817     print_generic_expr (file, *use->op_p, TDF_SLIM);
 818   fprintf (file, "\n");
 819   dump_iv (file, use->iv, false, 2);
 820 }
 821
 822 /* Dumps information about the uses to FILE.  */
 823
 824 void
 825 dump_groups (FILE *file, struct ivopts_data *data)
 826 {
 827   unsigned i, j;
 828   struct iv_group *group;
 829
 830   for (i = 0; i < data->vgroups.length (); i++)
 831     {
 832       group = data->vgroups[i];
 833       fprintf (file, "Group %d:\n", group->id);
 834       if (group->type == USE_NONLINEAR_EXPR)
 835         fprintf (file, "  Type:\tGENERIC\n");
 836       else if (group->type == USE_REF_ADDRESS)
 837         fprintf (file, "  Type:\tREFERENCE ADDRESS\n");
 838       else if (group->type == USE_PTR_ADDRESS)
 839         fprintf (file, "  Type:\tPOINTER ARGUMENT ADDRESS\n");
 840       else
 841         {
 842           gcc_assert (group->type == USE_COMPARE);
 843           fprintf (file, "  Type:\tCOMPARE\n");
 844         }
 845       for (j = 0; j < group->vuses.length (); j++)
 846         dump_use (file, group->vuses[j]);
 847     }
 848 }
 849
 850 /* Dumps information about induction variable candidate CAND to FILE.  */
 851
 852 void
 853 dump_cand (FILE *file, struct iv_cand *cand)
 854 {
 855   struct iv *iv = cand->iv;
 856
 857   fprintf (file, "Candidate %d:\n", cand->id);
 858   if (cand->inv_vars)
 859     {
 860       fprintf (file, "  Depend on inv.vars: ");
 861       dump_bitmap (file, cand->inv_vars);
 862     }
 863   if (cand->inv_exprs)
 864     {
 865       fprintf (file, "  Depend on inv.exprs: ");
 866       dump_bitmap (file, cand->inv_exprs);
 867     }
 868
 869   if (cand->var_before)
 870     {
 871       fprintf (file, "  Var befor: ");
 872       print_generic_expr (file, cand->var_before, TDF_SLIM);
 873       fprintf (file, "\n");
 874     }
 875   if (cand->var_after)
 876     {
 877       fprintf (file, "  Var after: ");
 878       print_generic_expr (file, cand->var_after, TDF_SLIM);
 879       fprintf (file, "\n");
 880     }
 881
 882   switch (cand->pos)
 883     {
 884     case IP_NORMAL:
 885       fprintf (file, "  Incr POS: before exit test\n");
 886       break;
 887
 888     case IP_BEFORE_USE:
 889       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 890       break;
 891
 892     case IP_AFTER_USE:
 893       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 894       break;
 895
 896     case IP_END:
 897       fprintf (file, "  Incr POS: at end\n");
 898       break;
 899
 900     case IP_ORIGINAL:
 901       fprintf (file, "  Incr POS: orig biv\n");
 902       break;
 903     }
 904
 905   dump_iv (file, iv, false, 1);
 906 }
 907
 908 /* Returns the info for ssa version VER.  */
 909
 910 static inline struct version_info *
 911 ver_info (struct ivopts_data *data, unsigned ver)
 912 {
 913   return data->version_info + ver;
 914 }
 915
 916 /* Returns the info for ssa name NAME.  */
 917
 918 static inline struct version_info *
 919 name_info (struct ivopts_data *data, tree name)
 920 {
 921   return ver_info (data, SSA_NAME_VERSION (name));
 922 }
 923
 924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 925    emitted in LOOP.  */
 926
 927 static bool
 928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
 929 {
 930   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 931
 932   gcc_assert (bb);
 933
 934   if (sbb == loop->latch)
 935     return true;
 936
 937   if (sbb != bb)
 938     return false;
 939
 940   return stmt == last_nondebug_stmt (bb);
 941 }
 942
 943 /* Returns true if STMT if after the place where the original induction
 944    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 945    if the positions are identical.  */
 946
 947 static bool
 948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 949 {
 950   basic_block cand_bb = gimple_bb (cand->incremented_at);
 951   basic_block stmt_bb = gimple_bb (stmt);
 952
 953   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 954     return false;
 955
 956   if (stmt_bb != cand_bb)
 957     return true;
 958
 959   if (true_if_equal
 960       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 961     return true;
 962   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 963 }
 964
 965 /* Returns true if STMT if after the place where the induction variable
 966    CAND is incremented in LOOP.  */
 967
 968 static bool
 969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
 970 {
 971   switch (cand->pos)
 972     {
 973     case IP_END:
 974       return false;
 975
 976     case IP_NORMAL:
 977       return stmt_after_ip_normal_pos (loop, stmt);
 978
 979     case IP_ORIGINAL:
 980     case IP_AFTER_USE:
 981       return stmt_after_inc_pos (cand, stmt, false);
 982
 983     case IP_BEFORE_USE:
 984       return stmt_after_inc_pos (cand, stmt, true);
 985
 986     default:
 987       gcc_unreachable ();
 988     }
 989 }
 990
 991 /* walk_tree callback for contains_abnormal_ssa_name_p.  */
 992
 993 static tree
 994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
 995 {
 996   if (TREE_CODE (*tp) == SSA_NAME
 997       && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
 998     return *tp;
 999
1000   if (!EXPR_P (*tp))
1001     *walk_subtrees = 0;
1002
1003   return NULL_TREE;
1004 }
1005
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007    abnormal phi node.  */
1008
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1011 {
1012   return walk_tree_without_duplicates
1013            (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1014 }
1015
1016 /*  Returns the structure describing number of iterations determined from
1017     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1018
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1021 {
1022   class tree_niter_desc *desc;
1023   tree_niter_desc **slot;
1024
1025   if (!data->niters)
1026     {
1027       data->niters = new hash_map<edge, tree_niter_desc *>;
1028       slot = NULL;
1029     }
1030   else
1031     slot = data->niters->get (exit);
1032
1033   if (!slot)
1034     {
1035       /* Try to determine number of iterations.  We cannot safely work with ssa
1036          names that appear in phi nodes on abnormal edges, so that we do not
1037          create overlapping life ranges for them (PR 27283).  */
1038       desc = XNEW (class tree_niter_desc);
1039       ::new (static_cast<void*> (desc)) tree_niter_desc ();
1040       if (!number_of_iterations_exit (data->current_loop,
1041                                       exit, desc, true)
1042           || contains_abnormal_ssa_name_p (desc->niter))
1043         {
1044           desc->~tree_niter_desc ();
1045           XDELETE (desc);
1046           desc = NULL;
1047         }
1048       data->niters->put (exit, desc);
1049     }
1050   else
1051     desc = *slot;
1052
1053   return desc;
1054 }
1055
1056 /* Returns the structure describing number of iterations determined from
1057    single dominating exit of DATA->current_loop, or NULL if something
1058    goes wrong.  */
1059
1060 static class tree_niter_desc *
1061 niter_for_single_dom_exit (struct ivopts_data *data)
1062 {
1063   edge exit = single_dom_exit (data->current_loop);
1064
1065   if (!exit)
1066     return NULL;
1067
1068   return niter_for_exit (data, exit);
1069 }
1070
1071 /* Initializes data structures used by the iv optimization pass, stored
1072    in DATA.  */
1073
1074 static void
1075 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1076 {
1077   data->version_info_size = 2 * num_ssa_names;
1078   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1079   data->relevant = BITMAP_ALLOC (NULL);
1080   data->important_candidates = BITMAP_ALLOC (NULL);
1081   data->max_inv_var_id = 0;
1082   data->max_inv_expr_id = 0;
1083   data->niters = NULL;
1084   data->vgroups.create (20);
1085   data->vcands.create (20);
1086   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1087   data->name_expansion_cache = NULL;
1088   data->base_object_map = NULL;
1089   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1090   data->iv_common_cands.create (20);
1091   decl_rtl_to_reset.create (20);
1092   gcc_obstack_init (&data->iv_obstack);
1093 }
1094
1095 /* walk_tree callback for determine_base_object.  */
1096
1097 static tree
1098 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1099 {
1100   tree_code code = TREE_CODE (*tp);
1101   tree obj = NULL_TREE;
1102   if (code == ADDR_EXPR)
1103     {
1104       tree base = get_base_address (TREE_OPERAND (*tp, 0));
1105       if (!base)
1106         obj = *tp;
1107       else if (TREE_CODE (base) != MEM_REF)
1108         obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1109     }
1110   else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1111         obj = fold_convert (ptr_type_node, *tp);
1112
1113   if (!obj)
1114     {
1115       if (!EXPR_P (*tp))
1116         *walk_subtrees = 0;
1117
1118       return NULL_TREE;
1119     }
1120   /* Record special node for multiple base objects and stop.  */
1121   if (*static_cast<tree *> (wdata))
1122     {
1123       *static_cast<tree *> (wdata) = integer_zero_node;
1124       return integer_zero_node;
1125     }
1126   /* Record the base object and continue looking.  */
1127   *static_cast<tree *> (wdata) = obj;
1128   return NULL_TREE;
1129 }
1130
1131 /* Returns a memory object to that EXPR points with caching.  Return NULL if we
1132    are able to determine that it does not point to any such object; specially
1133    return integer_zero_node if EXPR contains multiple base objects.  */
1134
1135 static tree
1136 determine_base_object (struct ivopts_data *data, tree expr)
1137 {
1138   tree *slot, obj = NULL_TREE;
1139   if (data->base_object_map)
1140     {
1141       if ((slot = data->base_object_map->get(expr)) != NULL)
1142         return *slot;
1143     }
1144   else
1145     data->base_object_map = new hash_map<tree, tree>;
1146
1147   (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1148   data->base_object_map->put (expr, obj);
1149   return obj;
1150 }
1151
1152 /* Return true if address expression with non-DECL_P operand appears
1153    in EXPR.  */
1154
1155 static bool
1156 contain_complex_addr_expr (tree expr)
1157 {
1158   bool res = false;
1159
1160   STRIP_NOPS (expr);
1161   switch (TREE_CODE (expr))
1162     {
1163     case POINTER_PLUS_EXPR:
1164     case PLUS_EXPR:
1165     case MINUS_EXPR:
1166       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1167       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1168       break;
1169
1170     case ADDR_EXPR:
1171       return (!DECL_P (TREE_OPERAND (expr, 0)));
1172
1173     default:
1174       return false;
1175     }
1176
1177   return res;
1178 }
1179
1180 /* Allocates an induction variable with given initial value BASE and step STEP
1181    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1182
1183 static struct iv *
1184 alloc_iv (struct ivopts_data *data, tree base, tree step,
1185           bool no_overflow = false)
1186 {
1187   tree expr = base;
1188   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1189                                               sizeof (struct iv));
1190   gcc_assert (step != NULL_TREE);
1191
1192   /* Canonicalize the address expression in base if it were an unsigned
1193       computation. That leads to more equalities being detected and results in:
1194
1195        1) More accurate cost can be computed for address expressions;
1196        2) Duplicate candidates won't be created for bases in different
1197           forms, like &a[0] and &a.
1198        3) Duplicate candidates won't be created for IV expressions that differ
1199           only in their sign.  */
1200   aff_tree comb;
1201   STRIP_NOPS (expr);
1202   expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1203   tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1204   base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1205
1206   iv->base = base;
1207   iv->base_object = determine_base_object (data, base);
1208   iv->step = step;
1209   iv->biv_p = false;
1210   iv->nonlin_use = NULL;
1211   iv->ssa_name = NULL_TREE;
1212   if (!no_overflow
1213        && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1214                               base, step))
1215     no_overflow = true;
1216   iv->no_overflow = no_overflow;
1217   iv->have_address_use = false;
1218
1219   return iv;
1220 }
1221
1222 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1223    doesn't overflow.  */
1224
1225 static void
1226 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1227         bool no_overflow)
1228 {
1229   struct version_info *info = name_info (data, iv);
1230
1231   gcc_assert (!info->iv);
1232
1233   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1234   info->iv = alloc_iv (data, base, step, no_overflow);
1235   info->iv->ssa_name = iv;
1236 }
1237
1238 /* Finds induction variable declaration for VAR.  */
1239
1240 static struct iv *
1241 get_iv (struct ivopts_data *data, tree var)
1242 {
1243   basic_block bb;
1244   tree type = TREE_TYPE (var);
1245
1246   if (!POINTER_TYPE_P (type)
1247       && !INTEGRAL_TYPE_P (type))
1248     return NULL;
1249
1250   if (!name_info (data, var)->iv)
1251     {
1252       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1253
1254       if (!bb
1255           || !flow_bb_inside_loop_p (data->current_loop, bb))
1256         {
1257           if (POINTER_TYPE_P (type))
1258             type = sizetype;
1259           set_iv (data, var, var, build_int_cst (type, 0), true);
1260         }
1261     }
1262
1263   return name_info (data, var)->iv;
1264 }
1265
1266 /* Return the first non-invariant ssa var found in EXPR.  */
1267
1268 static tree
1269 extract_single_var_from_expr (tree expr)
1270 {
1271   int i, n;
1272   tree tmp;
1273   enum tree_code code;
1274
1275   if (!expr || is_gimple_min_invariant (expr))
1276     return NULL;
1277
1278   code = TREE_CODE (expr);
1279   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1280     {
1281       n = TREE_OPERAND_LENGTH (expr);
1282       for (i = 0; i < n; i++)
1283         {
1284           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1285
1286           if (tmp)
1287             return tmp;
1288         }
1289     }
1290   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1291 }
1292
1293 /* Finds basic ivs.  */
1294
1295 static bool
1296 find_bivs (struct ivopts_data *data)
1297 {
1298   gphi *phi;
1299   affine_iv iv;
1300   tree step, type, base, stop;
1301   bool found = false;
1302   class loop *loop = data->current_loop;
1303   gphi_iterator psi;
1304
1305   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1306     {
1307       phi = psi.phi ();
1308
1309       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1310         continue;
1311
1312       if (virtual_operand_p (PHI_RESULT (phi)))
1313         continue;
1314
1315       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1316         continue;
1317
1318       if (integer_zerop (iv.step))
1319         continue;
1320
1321       step = iv.step;
1322       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1323       /* Stop expanding iv base at the first ssa var referred by iv step.
1324          Ideally we should stop at any ssa var, because that's expensive
1325          and unusual to happen, we just do it on the first one.
1326
1327          See PR64705 for the rationale.  */
1328       stop = extract_single_var_from_expr (step);
1329       base = expand_simple_operations (base, stop);
1330       if (contains_abnormal_ssa_name_p (base)
1331           || contains_abnormal_ssa_name_p (step))
1332         continue;
1333
1334       type = TREE_TYPE (PHI_RESULT (phi));
1335       base = fold_convert (type, base);
1336       if (step)
1337         {
1338           if (POINTER_TYPE_P (type))
1339             step = convert_to_ptrofftype (step);
1340           else
1341             step = fold_convert (type, step);
1342         }
1343
1344       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1345       found = true;
1346     }
1347
1348   return found;
1349 }
1350
1351 /* Marks basic ivs.  */
1352
1353 static void
1354 mark_bivs (struct ivopts_data *data)
1355 {
1356   gphi *phi;
1357   gimple *def;
1358   tree var;
1359   struct iv *iv, *incr_iv;
1360   class loop *loop = data->current_loop;
1361   basic_block incr_bb;
1362   gphi_iterator psi;
1363
1364   data->bivs_not_used_in_addr = 0;
1365   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1366     {
1367       phi = psi.phi ();
1368
1369       iv = get_iv (data, PHI_RESULT (phi));
1370       if (!iv)
1371         continue;
1372
1373       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1374       def = SSA_NAME_DEF_STMT (var);
1375       /* Don't mark iv peeled from other one as biv.  */
1376       if (def
1377           && gimple_code (def) == GIMPLE_PHI
1378           && gimple_bb (def) == loop->header)
1379         continue;
1380
1381       incr_iv = get_iv (data, var);
1382       if (!incr_iv)
1383         continue;
1384
1385       /* If the increment is in the subloop, ignore it.  */
1386       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1387       if (incr_bb->loop_father != data->current_loop
1388           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1389         continue;
1390
1391       iv->biv_p = true;
1392       incr_iv->biv_p = true;
1393       if (iv->no_overflow)
1394         data->bivs_not_used_in_addr++;
1395       if (incr_iv->no_overflow)
1396         data->bivs_not_used_in_addr++;
1397     }
1398 }
1399
1400 /* Checks whether STMT defines a linear induction variable and stores its
1401    parameters to IV.  */
1402
1403 static bool
1404 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1405 {
1406   tree lhs, stop;
1407   class loop *loop = data->current_loop;
1408
1409   iv->base = NULL_TREE;
1410   iv->step = NULL_TREE;
1411
1412   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1413     return false;
1414
1415   lhs = gimple_assign_lhs (stmt);
1416   if (TREE_CODE (lhs) != SSA_NAME)
1417     return false;
1418
1419   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1420     return false;
1421
1422   /* Stop expanding iv base at the first ssa var referred by iv step.
1423      Ideally we should stop at any ssa var, because that's expensive
1424      and unusual to happen, we just do it on the first one.
1425
1426      See PR64705 for the rationale.  */
1427   stop = extract_single_var_from_expr (iv->step);
1428   iv->base = expand_simple_operations (iv->base, stop);
1429   if (contains_abnormal_ssa_name_p (iv->base)
1430       || contains_abnormal_ssa_name_p (iv->step))
1431     return false;
1432
1433   /* If STMT could throw, then do not consider STMT as defining a GIV.
1434      While this will suppress optimizations, we cannot safely delete this
1435      GIV and associated statements, even if it appears it is not used.  */
1436   if (stmt_could_throw_p (cfun, stmt))
1437     return false;
1438
1439   return true;
1440 }
1441
1442 /* Finds general ivs in statement STMT.  */
1443
1444 static void
1445 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1446 {
1447   affine_iv iv;
1448
1449   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1450     return;
1451
1452   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1453 }
1454
1455 /* Finds general ivs in basic block BB.  */
1456
1457 static void
1458 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1459 {
1460   gimple_stmt_iterator bsi;
1461
1462   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1463     if (!is_gimple_debug (gsi_stmt (bsi)))
1464       find_givs_in_stmt (data, gsi_stmt (bsi));
1465 }
1466
1467 /* Finds general ivs.  */
1468
1469 static void
1470 find_givs (struct ivopts_data *data, basic_block *body)
1471 {
1472   class loop *loop = data->current_loop;
1473   unsigned i;
1474
1475   for (i = 0; i < loop->num_nodes; i++)
1476     find_givs_in_bb (data, body[i]);
1477 }
1478
1479 /* For each ssa name defined in LOOP determines whether it is an induction
1480    variable and if so, its initial value and step.  */
1481
1482 static bool
1483 find_induction_variables (struct ivopts_data *data, basic_block *body)
1484 {
1485   unsigned i;
1486   bitmap_iterator bi;
1487
1488   if (!find_bivs (data))
1489     return false;
1490
1491   find_givs (data, body);
1492   mark_bivs (data);
1493
1494   if (dump_file && (dump_flags & TDF_DETAILS))
1495     {
1496       class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1497
1498       if (niter)
1499         {
1500           fprintf (dump_file, "  number of iterations ");
1501           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1502           if (!integer_zerop (niter->may_be_zero))
1503             {
1504               fprintf (dump_file, "; zero if ");
1505               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1506             }
1507           fprintf (dump_file, "\n");
1508         };
1509
1510       fprintf (dump_file, "\n<Induction Vars>:\n");
1511       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1512         {
1513           struct version_info *info = ver_info (data, i);
1514           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1515             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1516         }
1517     }
1518
1519   return true;
1520 }
1521
1522 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1523    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1524    is the const offset stripped from IV base and MEM_TYPE is the type
1525    of the memory being addressed.  For uses of other types, ADDR_BASE
1526    and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE.  */
1527
1528 static struct iv_use *
1529 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1530             gimple *stmt, enum use_type type, tree mem_type,
1531             tree addr_base, poly_uint64 addr_offset)
1532 {
1533   struct iv_use *use = XCNEW (struct iv_use);
1534
1535   use->id = group->vuses.length ();
1536   use->group_id = group->id;
1537   use->type = type;
1538   use->mem_type = mem_type;
1539   use->iv = iv;
1540   use->stmt = stmt;
1541   use->op_p = use_p;
1542   use->addr_base = addr_base;
1543   use->addr_offset = addr_offset;
1544
1545   group->vuses.safe_push (use);
1546   return use;
1547 }
1548
1549 /* Checks whether OP is a loop-level invariant and if so, records it.
1550    NONLINEAR_USE is true if the invariant is used in a way we do not
1551    handle specially.  */
1552
1553 static void
1554 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1555 {
1556   basic_block bb;
1557   struct version_info *info;
1558
1559   if (TREE_CODE (op) != SSA_NAME
1560       || virtual_operand_p (op))
1561     return;
1562
1563   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1564   if (bb
1565       && flow_bb_inside_loop_p (data->current_loop, bb))
1566     return;
1567
1568   info = name_info (data, op);
1569   info->name = op;
1570   info->has_nonlin_use |= nonlinear_use;
1571   if (!info->inv_id)
1572     info->inv_id = ++data->max_inv_var_id;
1573   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1574 }
1575
1576 /* Record a group of TYPE.  */
1577
1578 static struct iv_group *
1579 record_group (struct ivopts_data *data, enum use_type type)
1580 {
1581   struct iv_group *group = XCNEW (struct iv_group);
1582
1583   group->id = data->vgroups.length ();
1584   group->type = type;
1585   group->related_cands = BITMAP_ALLOC (NULL);
1586   group->vuses.create (1);
1587   group->doloop_p = false;
1588
1589   data->vgroups.safe_push (group);
1590   return group;
1591 }
1592
1593 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1594    New group will be created if there is no existing group for the use.
1595    MEM_TYPE is the type of memory being addressed, or NULL if this
1596    isn't an address reference.  */
1597
1598 static struct iv_use *
1599 record_group_use (struct ivopts_data *data, tree *use_p,
1600                   struct iv *iv, gimple *stmt, enum use_type type,
1601                   tree mem_type)
1602 {
1603   tree addr_base = NULL;
1604   struct iv_group *group = NULL;
1605   poly_uint64 addr_offset = 0;
1606
1607   /* Record non address type use in a new group.  */
1608   if (address_p (type))
1609     {
1610       unsigned int i;
1611
1612       gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1613       tree addr_toffset;
1614       split_constant_offset (iv->base, &addr_base, &addr_toffset);
1615       addr_offset = int_cst_value (addr_toffset);
1616       for (i = 0; i < data->vgroups.length (); i++)
1617         {
1618           struct iv_use *use;
1619
1620           group = data->vgroups[i];
1621           use = group->vuses[0];
1622           if (!address_p (use->type))
1623             continue;
1624
1625           /* Check if it has the same stripped base and step.  */
1626           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1627               && operand_equal_p (iv->step, use->iv->step, OEP_ASSUME_WRAPV)
1628               && operand_equal_p (addr_base, use->addr_base, OEP_ASSUME_WRAPV))
1629             break;
1630         }
1631       if (i == data->vgroups.length ())
1632         group = NULL;
1633     }
1634
1635   if (!group)
1636     group = record_group (data, type);
1637
1638   return record_use (group, use_p, iv, stmt, type, mem_type,
1639                      addr_base, addr_offset);
1640 }
1641
1642 /* Checks whether the use OP is interesting and if so, records it.  */
1643
1644 static struct iv_use *
1645 find_interesting_uses_op (struct ivopts_data *data, tree op)
1646 {
1647   struct iv *iv;
1648   gimple *stmt;
1649   struct iv_use *use;
1650
1651   if (TREE_CODE (op) != SSA_NAME)
1652     return NULL;
1653
1654   iv = get_iv (data, op);
1655   if (!iv)
1656     return NULL;
1657
1658   if (iv->nonlin_use)
1659     {
1660       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1661       return iv->nonlin_use;
1662     }
1663
1664   if (integer_zerop (iv->step))
1665     {
1666       record_invariant (data, op, true);
1667       return NULL;
1668     }
1669
1670   stmt = SSA_NAME_DEF_STMT (op);
1671   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1672
1673   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1674   iv->nonlin_use = use;
1675   return use;
1676 }
1677
1678 /* Indicate how compare type iv_use can be handled.  */
1679 enum comp_iv_rewrite
1680 {
1681   COMP_IV_NA,
1682   /* We may rewrite compare type iv_use by expressing value of the iv_use.  */
1683   COMP_IV_EXPR,
1684   /* We may rewrite compare type iv_uses on both sides of comparison by
1685      expressing value of each iv_use.  */
1686   COMP_IV_EXPR_2,
1687   /* We may rewrite compare type iv_use by expressing value of the iv_use
1688      or by eliminating it with other iv_cand.  */
1689   COMP_IV_ELIM
1690 };
1691
1692 /* Given a condition in statement STMT, checks whether it is a compare
1693    of an induction variable and an invariant.  If this is the case,
1694    CONTROL_VAR is set to location of the iv, BOUND to the location of
1695    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1696    induction variable descriptions, and true is returned.  If this is not
1697    the case, CONTROL_VAR and BOUND are set to the arguments of the
1698    condition and false is returned.  */
1699
1700 static enum comp_iv_rewrite
1701 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1702                        tree **control_var, tree **bound,
1703                        struct iv **iv_var, struct iv **iv_bound)
1704 {
1705   /* The objects returned when COND has constant operands.  */
1706   static struct iv const_iv;
1707   static tree zero;
1708   tree *op0 = &zero, *op1 = &zero;
1709   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1710   enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1711
1712   if (gimple_code (stmt) == GIMPLE_COND)
1713     {
1714       gcond *cond_stmt = as_a <gcond *> (stmt);
1715       op0 = gimple_cond_lhs_ptr (cond_stmt);
1716       op1 = gimple_cond_rhs_ptr (cond_stmt);
1717     }
1718   else
1719     {
1720       op0 = gimple_assign_rhs1_ptr (stmt);
1721       op1 = gimple_assign_rhs2_ptr (stmt);
1722     }
1723
1724   zero = integer_zero_node;
1725   const_iv.step = integer_zero_node;
1726
1727   if (TREE_CODE (*op0) == SSA_NAME)
1728     iv0 = get_iv (data, *op0);
1729   if (TREE_CODE (*op1) == SSA_NAME)
1730     iv1 = get_iv (data, *op1);
1731
1732   /* If both sides of comparison are IVs.  We can express ivs on both end.  */
1733   if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1734     {
1735       rewrite_type = COMP_IV_EXPR_2;
1736       goto end;
1737     }
1738
1739   /* If none side of comparison is IV.  */
1740   if ((!iv0 || integer_zerop (iv0->step))
1741       && (!iv1 || integer_zerop (iv1->step)))
1742     goto end;
1743
1744   /* Control variable may be on the other side.  */
1745   if (!iv0 || integer_zerop (iv0->step))
1746     {
1747       std::swap (op0, op1);
1748       std::swap (iv0, iv1);
1749     }
1750   /* If one side is IV and the other side isn't loop invariant.  */
1751   if (!iv1)
1752     rewrite_type = COMP_IV_EXPR;
1753   /* If one side is IV and the other side is loop invariant.  */
1754   else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1755     rewrite_type = COMP_IV_ELIM;
1756
1757 end:
1758   if (control_var)
1759     *control_var = op0;
1760   if (iv_var)
1761     *iv_var = iv0;
1762   if (bound)
1763     *bound = op1;
1764   if (iv_bound)
1765     *iv_bound = iv1;
1766
1767   return rewrite_type;
1768 }
1769
1770 /* Checks whether the condition in STMT is interesting and if so,
1771    records it.  */
1772
1773 static void
1774 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1775 {
1776   tree *var_p, *bound_p;
1777   struct iv *var_iv, *bound_iv;
1778   enum comp_iv_rewrite ret;
1779
1780   ret = extract_cond_operands (data, stmt,
1781                                &var_p, &bound_p, &var_iv, &bound_iv);
1782   if (ret == COMP_IV_NA)
1783     {
1784       find_interesting_uses_op (data, *var_p);
1785       find_interesting_uses_op (data, *bound_p);
1786       return;
1787     }
1788
1789   record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1790   /* Record compare type iv_use for iv on the other side of comparison.  */
1791   if (ret == COMP_IV_EXPR_2)
1792     record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1793 }
1794
1795 /* Returns the outermost loop EXPR is obviously invariant in
1796    relative to the loop LOOP, i.e. if all its operands are defined
1797    outside of the returned loop.  Returns NULL if EXPR is not
1798    even obviously invariant in LOOP.  */
1799
1800 class loop *
1801 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1802 {
1803   basic_block def_bb;
1804   unsigned i, len;
1805
1806   if (is_gimple_min_invariant (expr))
1807     return current_loops->tree_root;
1808
1809   if (TREE_CODE (expr) == SSA_NAME)
1810     {
1811       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1812       if (def_bb)
1813         {
1814           if (flow_bb_inside_loop_p (loop, def_bb))
1815             return NULL;
1816           return superloop_at_depth (loop,
1817                                      loop_depth (def_bb->loop_father) + 1);
1818         }
1819
1820       return current_loops->tree_root;
1821     }
1822
1823   if (!EXPR_P (expr))
1824     return NULL;
1825
1826   unsigned maxdepth = 0;
1827   len = TREE_OPERAND_LENGTH (expr);
1828   for (i = 0; i < len; i++)
1829     {
1830       class loop *ivloop;
1831       if (!TREE_OPERAND (expr, i))
1832         continue;
1833
1834       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1835       if (!ivloop)
1836         return NULL;
1837       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1838     }
1839
1840   return superloop_at_depth (loop, maxdepth);
1841 }
1842
1843 /* Returns true if expression EXPR is obviously invariant in LOOP,
1844    i.e. if all its operands are defined outside of the LOOP.  LOOP
1845    should not be the function body.  */
1846
1847 bool
1848 expr_invariant_in_loop_p (class loop *loop, tree expr)
1849 {
1850   basic_block def_bb;
1851   unsigned i, len;
1852
1853   gcc_assert (loop_depth (loop) > 0);
1854
1855   if (is_gimple_min_invariant (expr))
1856     return true;
1857
1858   if (TREE_CODE (expr) == SSA_NAME)
1859     {
1860       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1861       if (def_bb
1862           && flow_bb_inside_loop_p (loop, def_bb))
1863         return false;
1864
1865       return true;
1866     }
1867
1868   if (!EXPR_P (expr))
1869     return false;
1870
1871   len = TREE_OPERAND_LENGTH (expr);
1872   for (i = 0; i < len; i++)
1873     if (TREE_OPERAND (expr, i)
1874         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1875       return false;
1876
1877   return true;
1878 }
1879
1880 /* Given expression EXPR which computes inductive values with respect
1881    to loop recorded in DATA, this function returns biv from which EXPR
1882    is derived by tracing definition chains of ssa variables in EXPR.  */
1883
1884 static struct iv*
1885 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1886 {
1887   struct iv *iv;
1888   unsigned i, n;
1889   tree e2, e1;
1890   enum tree_code code;
1891   gimple *stmt;
1892
1893   if (expr == NULL_TREE)
1894     return NULL;
1895
1896   if (is_gimple_min_invariant (expr))
1897     return NULL;
1898
1899   code = TREE_CODE (expr);
1900   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1901     {
1902       n = TREE_OPERAND_LENGTH (expr);
1903       for (i = 0; i < n; i++)
1904         {
1905           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1906           if (iv)
1907             return iv;
1908         }
1909     }
1910
1911   /* Stop if it's not ssa name.  */
1912   if (code != SSA_NAME)
1913     return NULL;
1914
1915   iv = get_iv (data, expr);
1916   if (!iv || integer_zerop (iv->step))
1917     return NULL;
1918   else if (iv->biv_p)
1919     return iv;
1920
1921   stmt = SSA_NAME_DEF_STMT (expr);
1922   if (gphi *phi = dyn_cast <gphi *> (stmt))
1923     {
1924       ssa_op_iter iter;
1925       use_operand_p use_p;
1926       basic_block phi_bb = gimple_bb (phi);
1927
1928       /* Skip loop header PHI that doesn't define biv.  */
1929       if (phi_bb->loop_father == data->current_loop)
1930         return NULL;
1931
1932       if (virtual_operand_p (gimple_phi_result (phi)))
1933         return NULL;
1934
1935       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1936         {
1937           tree use = USE_FROM_PTR (use_p);
1938           iv = find_deriving_biv_for_expr (data, use);
1939           if (iv)
1940             return iv;
1941         }
1942       return NULL;
1943     }
1944   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1945     return NULL;
1946
1947   e1 = gimple_assign_rhs1 (stmt);
1948   code = gimple_assign_rhs_code (stmt);
1949   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1950     return find_deriving_biv_for_expr (data, e1);
1951
1952   switch (code)
1953     {
1954     case MULT_EXPR:
1955     case PLUS_EXPR:
1956     case MINUS_EXPR:
1957     case POINTER_PLUS_EXPR:
1958       /* Increments, decrements and multiplications by a constant
1959          are simple.  */
1960       e2 = gimple_assign_rhs2 (stmt);
1961       iv = find_deriving_biv_for_expr (data, e2);
1962       if (iv)
1963         return iv;
1964       gcc_fallthrough ();
1965
1966     CASE_CONVERT:
1967       /* Casts are simple.  */
1968       return find_deriving_biv_for_expr (data, e1);
1969
1970     default:
1971       break;
1972     }
1973
1974   return NULL;
1975 }
1976
1977 /* Record BIV, its predecessor and successor that they are used in
1978    address type uses.  */
1979
1980 static void
1981 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1982 {
1983   unsigned i;
1984   tree type, base_1, base_2;
1985   bitmap_iterator bi;
1986
1987   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1988       || biv->have_address_use || !biv->no_overflow)
1989     return;
1990
1991   type = TREE_TYPE (biv->base);
1992   if (!INTEGRAL_TYPE_P (type))
1993     return;
1994
1995   biv->have_address_use = true;
1996   data->bivs_not_used_in_addr--;
1997   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1998   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1999     {
2000       struct iv *iv = ver_info (data, i)->iv;
2001
2002       if (!iv || !iv->biv_p || integer_zerop (iv->step)
2003           || iv->have_address_use || !iv->no_overflow)
2004         continue;
2005
2006       if (type != TREE_TYPE (iv->base)
2007           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2008         continue;
2009
2010       if (!operand_equal_p (biv->step, iv->step, 0))
2011         continue;
2012
2013       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2014       if (operand_equal_p (base_1, iv->base, 0)
2015           || operand_equal_p (base_2, biv->base, 0))
2016         {
2017           iv->have_address_use = true;
2018           data->bivs_not_used_in_addr--;
2019         }
2020     }
2021 }
2022
2023 /* Cumulates the steps of indices into DATA and replaces their values with the
2024    initial ones.  Returns false when the value of the index cannot be determined.
2025    Callback for for_each_index.  */
2026
2027 struct ifs_ivopts_data
2028 {
2029   struct ivopts_data *ivopts_data;
2030   gimple *stmt;
2031   tree step;
2032 };
2033
2034 static bool
2035 idx_find_step (tree base, tree *idx, void *data)
2036 {
2037   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2038   struct iv *iv;
2039   bool use_overflow_semantics = false;
2040   tree step, iv_base, iv_step, lbound, off;
2041   class loop *loop = dta->ivopts_data->current_loop;
2042
2043   /* If base is a component ref, require that the offset of the reference
2044      be invariant.  */
2045   if (TREE_CODE (base) == COMPONENT_REF)
2046     {
2047       off = component_ref_field_offset (base);
2048       return expr_invariant_in_loop_p (loop, off);
2049     }
2050
2051   /* If base is array, first check whether we will be able to move the
2052      reference out of the loop (in order to take its address in strength
2053      reduction).  In order for this to work we need both lower bound
2054      and step to be loop invariants.  */
2055   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2056     {
2057       /* Moreover, for a range, the size needs to be invariant as well.  */
2058       if (TREE_CODE (base) == ARRAY_RANGE_REF
2059           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2060         return false;
2061
2062       step = array_ref_element_size (base);
2063       lbound = array_ref_low_bound (base);
2064
2065       if (!expr_invariant_in_loop_p (loop, step)
2066           || !expr_invariant_in_loop_p (loop, lbound))
2067         return false;
2068     }
2069
2070   if (TREE_CODE (*idx) != SSA_NAME)
2071     return true;
2072
2073   iv = get_iv (dta->ivopts_data, *idx);
2074   if (!iv)
2075     return false;
2076
2077   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
2078           *&x[0], which is not folded and does not trigger the
2079           ARRAY_REF path below.  */
2080   *idx = iv->base;
2081
2082   if (integer_zerop (iv->step))
2083     return true;
2084
2085   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2086     {
2087       step = array_ref_element_size (base);
2088
2089       /* We only handle addresses whose step is an integer constant.  */
2090       if (TREE_CODE (step) != INTEGER_CST)
2091         return false;
2092     }
2093   else
2094     /* The step for pointer arithmetics already is 1 byte.  */
2095     step = size_one_node;
2096
2097   iv_base = iv->base;
2098   iv_step = iv->step;
2099   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2100     use_overflow_semantics = true;
2101
2102   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2103                             sizetype, &iv_base, &iv_step, dta->stmt,
2104                             use_overflow_semantics))
2105     {
2106       /* The index might wrap.  */
2107       return false;
2108     }
2109
2110   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2111   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2112
2113   if (dta->ivopts_data->bivs_not_used_in_addr)
2114     {
2115       if (!iv->biv_p)
2116         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2117
2118       record_biv_for_address_use (dta->ivopts_data, iv);
2119     }
2120   return true;
2121 }
2122
2123 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2124    object is passed to it in DATA.  */
2125
2126 static bool
2127 idx_record_use (tree base, tree *idx,
2128                 void *vdata)
2129 {
2130   struct ivopts_data *data = (struct ivopts_data *) vdata;
2131   find_interesting_uses_op (data, *idx);
2132   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2133     {
2134       if (TREE_OPERAND (base, 2))
2135         find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2136       if (TREE_OPERAND (base, 3))
2137         find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2138     }
2139   return true;
2140 }
2141
2142 /* If we can prove that TOP = cst * BOT for some constant cst,
2143    store cst to MUL and return true.  Otherwise return false.
2144    The returned value is always sign-extended, regardless of the
2145    signedness of TOP and BOT.  */
2146
2147 static bool
2148 constant_multiple_of (tree top, tree bot, widest_int *mul)
2149 {
2150   aff_tree aff_top, aff_bot;
2151   tree_to_aff_combination (top, TREE_TYPE (top), &aff_top);
2152   tree_to_aff_combination (bot, TREE_TYPE (bot), &aff_bot);
2153   poly_widest_int poly_mul;
2154   if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2155       && poly_mul.is_constant (mul))
2156     return true;
2157
2158   return false;
2159 }
2160
2161 /* Return true if memory reference REF with step STEP may be unaligned.  */
2162
2163 static bool
2164 may_be_unaligned_p (tree ref, tree step)
2165 {
2166   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2167      thus they are not misaligned.  */
2168   if (TREE_CODE (ref) == TARGET_MEM_REF)
2169     return false;
2170
2171   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2172   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2173     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2174
2175   unsigned HOST_WIDE_INT bitpos;
2176   unsigned int ref_align;
2177   get_object_alignment_1 (ref, &ref_align, &bitpos);
2178   if (ref_align < align
2179       || (bitpos % align) != 0
2180       || (bitpos % BITS_PER_UNIT) != 0)
2181     return true;
2182
2183   unsigned int trailing_zeros = tree_ctz (step);
2184   if (trailing_zeros < HOST_BITS_PER_INT
2185       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2186     return true;
2187
2188   return false;
2189 }
2190
2191 /* Return true if EXPR may be non-addressable.   */
2192
2193 bool
2194 may_be_nonaddressable_p (tree expr)
2195 {
2196   switch (TREE_CODE (expr))
2197     {
2198     case VAR_DECL:
2199       /* Check if it's a register variable.  */
2200       return DECL_HARD_REGISTER (expr);
2201
2202     case TARGET_MEM_REF:
2203       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2204          target, thus they are always addressable.  */
2205       return false;
2206
2207     case MEM_REF:
2208       /* Likewise for MEM_REFs, modulo the storage order.  */
2209       return REF_REVERSE_STORAGE_ORDER (expr);
2210
2211     case BIT_FIELD_REF:
2212       if (REF_REVERSE_STORAGE_ORDER (expr))
2213         return true;
2214       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2215
2216     case COMPONENT_REF:
2217       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2218         return true;
2219       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2220              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2221
2222     case ARRAY_REF:
2223     case ARRAY_RANGE_REF:
2224       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2225         return true;
2226       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2227
2228     case VIEW_CONVERT_EXPR:
2229       /* This kind of view-conversions may wrap non-addressable objects
2230          and make them look addressable.  After some processing the
2231          non-addressability may be uncovered again, causing ADDR_EXPRs
2232          of inappropriate objects to be built.  */
2233       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2234           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2235         return true;
2236       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2237
2238     CASE_CONVERT:
2239       return true;
2240
2241     default:
2242       break;
2243     }
2244
2245   return false;
2246 }
2247
2248 /* Finds addresses in *OP_P inside STMT.  */
2249
2250 static void
2251 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2252                                tree *op_p)
2253 {
2254   tree base = *op_p, step = size_zero_node;
2255   struct iv *civ;
2256   struct ifs_ivopts_data ifs_ivopts_data;
2257
2258   /* Do not play with volatile memory references.  A bit too conservative,
2259      perhaps, but safe.  */
2260   if (gimple_has_volatile_ops (stmt))
2261     goto fail;
2262
2263   /* Ignore bitfields for now.  Not really something terribly complicated
2264      to handle.  TODO.  */
2265   if (TREE_CODE (base) == BIT_FIELD_REF)
2266     goto fail;
2267
2268   base = unshare_expr (base);
2269
2270   if (TREE_CODE (base) == TARGET_MEM_REF)
2271     {
2272       tree type = build_pointer_type (TREE_TYPE (base));
2273       tree astep;
2274
2275       if (TMR_BASE (base)
2276           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2277         {
2278           civ = get_iv (data, TMR_BASE (base));
2279           if (!civ)
2280             goto fail;
2281
2282           TMR_BASE (base) = civ->base;
2283           step = civ->step;
2284         }
2285       if (TMR_INDEX2 (base)
2286           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2287         {
2288           civ = get_iv (data, TMR_INDEX2 (base));
2289           if (!civ)
2290             goto fail;
2291
2292           TMR_INDEX2 (base) = civ->base;
2293           step = civ->step;
2294         }
2295       if (TMR_INDEX (base)
2296           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2297         {
2298           civ = get_iv (data, TMR_INDEX (base));
2299           if (!civ)
2300             goto fail;
2301
2302           TMR_INDEX (base) = civ->base;
2303           astep = civ->step;
2304
2305           if (astep)
2306             {
2307               if (TMR_STEP (base))
2308                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2309
2310               step = fold_build2 (PLUS_EXPR, type, step, astep);
2311             }
2312         }
2313
2314       if (integer_zerop (step))
2315         goto fail;
2316       base = tree_mem_ref_addr (type, base);
2317     }
2318   else
2319     {
2320       ifs_ivopts_data.ivopts_data = data;
2321       ifs_ivopts_data.stmt = stmt;
2322       ifs_ivopts_data.step = size_zero_node;
2323       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2324           || integer_zerop (ifs_ivopts_data.step))
2325         goto fail;
2326       step = ifs_ivopts_data.step;
2327
2328       /* Check that the base expression is addressable.  This needs
2329          to be done after substituting bases of IVs into it.  */
2330       if (may_be_nonaddressable_p (base))
2331         goto fail;
2332
2333       /* Moreover, on strict alignment platforms, check that it is
2334          sufficiently aligned.  */
2335       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2336         goto fail;
2337
2338       base = build_fold_addr_expr (base);
2339
2340       /* Substituting bases of IVs into the base expression might
2341          have caused folding opportunities.  */
2342       if (TREE_CODE (base) == ADDR_EXPR)
2343         {
2344           tree *ref = &TREE_OPERAND (base, 0);
2345           while (handled_component_p (*ref))
2346             ref = &TREE_OPERAND (*ref, 0);
2347           if (TREE_CODE (*ref) == MEM_REF)
2348             {
2349               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2350                                       TREE_OPERAND (*ref, 0),
2351                                       TREE_OPERAND (*ref, 1));
2352               if (tem)
2353                 *ref = tem;
2354             }
2355         }
2356     }
2357
2358   civ = alloc_iv (data, base, step);
2359   /* Fail if base object of this memory reference is unknown.  */
2360   if (civ->base_object == NULL_TREE)
2361     goto fail;
2362
2363   record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2364   return;
2365
2366 fail:
2367   for_each_index (op_p, idx_record_use, data);
2368 }
2369
2370 /* Finds and records invariants used in STMT.  */
2371
2372 static void
2373 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2374 {
2375   ssa_op_iter iter;
2376   use_operand_p use_p;
2377   tree op;
2378
2379   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2380     {
2381       op = USE_FROM_PTR (use_p);
2382       record_invariant (data, op, false);
2383     }
2384 }
2385
2386 /* CALL calls an internal function.  If operand *OP_P will become an
2387    address when the call is expanded, return the type of the memory
2388    being addressed, otherwise return null.  */
2389
2390 static tree
2391 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2392 {
2393   switch (gimple_call_internal_fn (call))
2394     {
2395     case IFN_MASK_LOAD:
2396     case IFN_MASK_LOAD_LANES:
2397     case IFN_MASK_LEN_LOAD_LANES:
2398     case IFN_LEN_LOAD:
2399     case IFN_MASK_LEN_LOAD:
2400       if (op_p == gimple_call_arg_ptr (call, 0))
2401         return TREE_TYPE (gimple_call_lhs (call));
2402       return NULL_TREE;
2403
2404     case IFN_MASK_STORE:
2405     case IFN_MASK_STORE_LANES:
2406     case IFN_MASK_LEN_STORE_LANES:
2407     case IFN_LEN_STORE:
2408     case IFN_MASK_LEN_STORE:
2409       {
2410         if (op_p == gimple_call_arg_ptr (call, 0))
2411           {
2412             internal_fn ifn = gimple_call_internal_fn (call);
2413             int index = internal_fn_stored_value_index (ifn);
2414             return TREE_TYPE (gimple_call_arg (call, index));
2415           }
2416         return NULL_TREE;
2417       }
2418
2419     default:
2420       return NULL_TREE;
2421     }
2422 }
2423
2424 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2425    Return true if the operand will become an address when STMT
2426    is expanded and record the associated address use if so.  */
2427
2428 static bool
2429 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2430                        struct iv *iv)
2431 {
2432   /* Fail if base object of this memory reference is unknown.  */
2433   if (iv->base_object == NULL_TREE)
2434     return false;
2435
2436   tree mem_type = NULL_TREE;
2437   if (gcall *call = dyn_cast <gcall *> (stmt))
2438     if (gimple_call_internal_p (call))
2439       mem_type = get_mem_type_for_internal_fn (call, op_p);
2440   if (mem_type)
2441     {
2442       iv = alloc_iv (data, iv->base, iv->step);
2443       record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2444       return true;
2445     }
2446   return false;
2447 }
2448
2449 /* Finds interesting uses of induction variables in the statement STMT.  */
2450
2451 static void
2452 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2453 {
2454   struct iv *iv;
2455   tree op, *lhs, *rhs;
2456   ssa_op_iter iter;
2457   use_operand_p use_p;
2458   enum tree_code code;
2459
2460   find_invariants_stmt (data, stmt);
2461
2462   if (gimple_code (stmt) == GIMPLE_COND)
2463     {
2464       find_interesting_uses_cond (data, stmt);
2465       return;
2466     }
2467
2468   if (is_gimple_assign (stmt))
2469     {
2470       lhs = gimple_assign_lhs_ptr (stmt);
2471       rhs = gimple_assign_rhs1_ptr (stmt);
2472
2473       if (TREE_CODE (*lhs) == SSA_NAME)
2474         {
2475           /* If the statement defines an induction variable, the uses are not
2476              interesting by themselves.  */
2477
2478           iv = get_iv (data, *lhs);
2479
2480           if (iv && !integer_zerop (iv->step))
2481             return;
2482         }
2483
2484       code = gimple_assign_rhs_code (stmt);
2485       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2486           && (REFERENCE_CLASS_P (*rhs)
2487               || is_gimple_val (*rhs)))
2488         {
2489           if (REFERENCE_CLASS_P (*rhs))
2490             find_interesting_uses_address (data, stmt, rhs);
2491           else
2492             find_interesting_uses_op (data, *rhs);
2493
2494           if (REFERENCE_CLASS_P (*lhs))
2495             find_interesting_uses_address (data, stmt, lhs);
2496           return;
2497         }
2498       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2499         {
2500           find_interesting_uses_cond (data, stmt);
2501           return;
2502         }
2503
2504       /* TODO -- we should also handle address uses of type
2505
2506          memory = call (whatever);
2507
2508          and
2509
2510          call (memory).  */
2511     }
2512
2513   if (gimple_code (stmt) == GIMPLE_PHI
2514       && gimple_bb (stmt) == data->current_loop->header)
2515     {
2516       iv = get_iv (data, PHI_RESULT (stmt));
2517
2518       if (iv && !integer_zerop (iv->step))
2519         return;
2520     }
2521
2522   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2523     {
2524       op = USE_FROM_PTR (use_p);
2525
2526       if (TREE_CODE (op) != SSA_NAME)
2527         continue;
2528
2529       iv = get_iv (data, op);
2530       if (!iv)
2531         continue;
2532
2533       if (!find_address_like_use (data, stmt, use_p->use, iv))
2534         find_interesting_uses_op (data, op);
2535     }
2536 }
2537
2538 /* Finds interesting uses of induction variables outside of loops
2539    on loop exit edge EXIT.  */
2540
2541 static void
2542 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2543 {
2544   gphi *phi;
2545   gphi_iterator psi;
2546   tree def;
2547
2548   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2549     {
2550       phi = psi.phi ();
2551       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2552       if (!virtual_operand_p (def))
2553         find_interesting_uses_op (data, def);
2554     }
2555 }
2556
2557 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2558    mode for memory reference represented by USE.  */
2559
2560 static GTY (()) vec<rtx, va_gc> *addr_list;
2561
2562 static bool
2563 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2564 {
2565   rtx reg, addr;
2566   unsigned list_index;
2567   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2568   machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2569
2570   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2571   if (list_index >= vec_safe_length (addr_list))
2572     vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2573
2574   addr = (*addr_list)[list_index];
2575   if (!addr)
2576     {
2577       addr_mode = targetm.addr_space.address_mode (as);
2578       reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2579       addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2580       (*addr_list)[list_index] = addr;
2581     }
2582   else
2583     addr_mode = GET_MODE (addr);
2584
2585   XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2586   return (memory_address_addr_space_p (mem_mode, addr, as));
2587 }
2588
2589 /* Comparison function to sort group in ascending order of addr_offset.  */
2590
2591 static int
2592 group_compare_offset (const void *a, const void *b)
2593 {
2594   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2595   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2596
2597   return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2598 }
2599
2600 /* Check if small groups should be split.  Return true if no group
2601    contains more than two uses with distinct addr_offsets.  Return
2602    false otherwise.  We want to split such groups because:
2603
2604      1) Small groups don't have much benefit and may interfer with
2605         general candidate selection.
2606      2) Size for problem with only small groups is usually small and
2607         general algorithm can handle it well.
2608
2609    TODO -- Above claim may not hold when we want to merge memory
2610    accesses with conseuctive addresses.  */
2611
2612 static bool
2613 split_small_address_groups_p (struct ivopts_data *data)
2614 {
2615   unsigned int i, j, distinct = 1;
2616   struct iv_use *pre;
2617   struct iv_group *group;
2618
2619   for (i = 0; i < data->vgroups.length (); i++)
2620     {
2621       group = data->vgroups[i];
2622       if (group->vuses.length () == 1)
2623         continue;
2624
2625       gcc_assert (address_p (group->type));
2626       if (group->vuses.length () == 2)
2627         {
2628           if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2629                                       group->vuses[1]->addr_offset) > 0)
2630             std::swap (group->vuses[0], group->vuses[1]);
2631         }
2632       else
2633         group->vuses.qsort (group_compare_offset);
2634
2635       if (distinct > 2)
2636         continue;
2637
2638       distinct = 1;
2639       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2640         {
2641           if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2642             {
2643               pre = group->vuses[j];
2644               distinct++;
2645             }
2646
2647           if (distinct > 2)
2648             break;
2649         }
2650     }
2651
2652   return (distinct <= 2);
2653 }
2654
2655 /* For each group of address type uses, this function further groups
2656    these uses according to the maximum offset supported by target's
2657    [base + offset] addressing mode.  */
2658
2659 static void
2660 split_address_groups (struct ivopts_data *data)
2661 {
2662   unsigned int i, j;
2663   /* Always split group.  */
2664   bool split_p = split_small_address_groups_p (data);
2665
2666   for (i = 0; i < data->vgroups.length (); i++)
2667     {
2668       struct iv_group *new_group = NULL;
2669       struct iv_group *group = data->vgroups[i];
2670       struct iv_use *use = group->vuses[0];
2671
2672       use->id = 0;
2673       use->group_id = group->id;
2674       if (group->vuses.length () == 1)
2675         continue;
2676
2677       gcc_assert (address_p (use->type));
2678
2679       for (j = 1; j < group->vuses.length ();)
2680         {
2681           struct iv_use *next = group->vuses[j];
2682           poly_int64 offset = next->addr_offset - use->addr_offset;
2683
2684           /* Split group if aksed to, or the offset against the first
2685              use can't fit in offset part of addressing mode.  IV uses
2686              having the same offset are still kept in one group.  */
2687           if (maybe_ne (offset, 0)
2688               && (split_p || !addr_offset_valid_p (use, offset)))
2689             {
2690               if (!new_group)
2691                 new_group = record_group (data, group->type);
2692               group->vuses.ordered_remove (j);
2693               new_group->vuses.safe_push (next);
2694               continue;
2695             }
2696
2697           next->id = j;
2698           next->group_id = group->id;
2699           j++;
2700         }
2701     }
2702 }
2703
2704 /* Finds uses of the induction variables that are interesting.  */
2705
2706 static void
2707 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2708 {
2709   basic_block bb;
2710   gimple_stmt_iterator bsi;
2711   unsigned i;
2712   edge e;
2713
2714   for (i = 0; i < data->current_loop->num_nodes; i++)
2715     {
2716       edge_iterator ei;
2717       bb = body[i];
2718
2719       FOR_EACH_EDGE (e, ei, bb->succs)
2720         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2721             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2722           find_interesting_uses_outside (data, e);
2723
2724       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2725         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2726       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2727         if (!is_gimple_debug (gsi_stmt (bsi)))
2728           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2729     }
2730
2731   split_address_groups (data);
2732
2733   if (dump_file && (dump_flags & TDF_DETAILS))
2734     {
2735       fprintf (dump_file, "\n<IV Groups>:\n");
2736       dump_groups (dump_file, data);
2737       fprintf (dump_file, "\n");
2738     }
2739 }
2740
2741 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2742    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2743    we are at the top-level of the processed address.  */
2744
2745 static tree
2746 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2747                 poly_int64 *offset)
2748 {
2749   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2750   enum tree_code code;
2751   tree type, orig_type = TREE_TYPE (expr);
2752   poly_int64 off0, off1;
2753   HOST_WIDE_INT st;
2754   tree orig_expr = expr;
2755
2756   STRIP_NOPS (expr);
2757
2758   type = TREE_TYPE (expr);
2759   code = TREE_CODE (expr);
2760   *offset = 0;
2761
2762   switch (code)
2763     {
2764     case POINTER_PLUS_EXPR:
2765     case PLUS_EXPR:
2766     case MINUS_EXPR:
2767       op0 = TREE_OPERAND (expr, 0);
2768       op1 = TREE_OPERAND (expr, 1);
2769
2770       op0 = strip_offset_1 (op0, false, false, &off0);
2771       op1 = strip_offset_1 (op1, false, false, &off1);
2772
2773       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2774       if (op0 == TREE_OPERAND (expr, 0)
2775           && op1 == TREE_OPERAND (expr, 1))
2776         return orig_expr;
2777
2778       if (integer_zerop (op1))
2779         expr = op0;
2780       else if (integer_zerop (op0))
2781         {
2782           if (code == MINUS_EXPR)
2783             {
2784               if (TYPE_OVERFLOW_UNDEFINED (type))
2785                 {
2786                   type = unsigned_type_for (type);
2787                   op1 = fold_convert (type, op1);
2788                 }
2789               expr = fold_build1 (NEGATE_EXPR, type, op1);
2790             }
2791           else
2792             expr = op1;
2793         }
2794       else
2795         {
2796           if (TYPE_OVERFLOW_UNDEFINED (type))
2797             {
2798               type = unsigned_type_for (type);
2799               if (code == POINTER_PLUS_EXPR)
2800                 code = PLUS_EXPR;
2801               op0 = fold_convert (type, op0);
2802               op1 = fold_convert (type, op1);
2803             }
2804           expr = fold_build2 (code, type, op0, op1);
2805         }
2806
2807       return fold_convert (orig_type, expr);
2808
2809     case MULT_EXPR:
2810       op1 = TREE_OPERAND (expr, 1);
2811       if (!cst_and_fits_in_hwi (op1))
2812         return orig_expr;
2813
2814       op0 = TREE_OPERAND (expr, 0);
2815       op0 = strip_offset_1 (op0, false, false, &off0);
2816       if (op0 == TREE_OPERAND (expr, 0))
2817         return orig_expr;
2818
2819       *offset = off0 * int_cst_value (op1);
2820       if (integer_zerop (op0))
2821         expr = op0;
2822       else
2823         {
2824           if (TYPE_OVERFLOW_UNDEFINED (type))
2825             {
2826               type = unsigned_type_for (type);
2827               op0 = fold_convert (type, op0);
2828               op1 = fold_convert (type, op1);
2829             }
2830           expr = fold_build2 (MULT_EXPR, type, op0, op1);
2831         }
2832
2833       return fold_convert (orig_type, expr);
2834
2835     case ARRAY_REF:
2836     case ARRAY_RANGE_REF:
2837       if (!inside_addr)
2838         return orig_expr;
2839
2840       step = array_ref_element_size (expr);
2841       if (!cst_and_fits_in_hwi (step))
2842         break;
2843
2844       st = int_cst_value (step);
2845       op1 = TREE_OPERAND (expr, 1);
2846       op1 = strip_offset_1 (op1, false, false, &off1);
2847       *offset = off1 * st;
2848
2849       if (top_compref
2850           && integer_zerop (op1))
2851         {
2852           /* Strip the component reference completely.  */
2853           op0 = TREE_OPERAND (expr, 0);
2854           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855           *offset += off0;
2856           return op0;
2857         }
2858       break;
2859
2860     case COMPONENT_REF:
2861       {
2862         tree field;
2863
2864         if (!inside_addr)
2865           return orig_expr;
2866
2867         tmp = component_ref_field_offset (expr);
2868         field = TREE_OPERAND (expr, 1);
2869         if (top_compref
2870             && cst_and_fits_in_hwi (tmp)
2871             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2872           {
2873             HOST_WIDE_INT boffset, abs_off;
2874
2875             /* Strip the component reference completely.  */
2876             op0 = TREE_OPERAND (expr, 0);
2877             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2878             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2879             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2880             if (boffset < 0)
2881               abs_off = -abs_off;
2882
2883             *offset = off0 + int_cst_value (tmp) + abs_off;
2884             return op0;
2885           }
2886       }
2887       break;
2888
2889     case ADDR_EXPR:
2890       op0 = TREE_OPERAND (expr, 0);
2891       op0 = strip_offset_1 (op0, true, true, &off0);
2892       *offset += off0;
2893
2894       if (op0 == TREE_OPERAND (expr, 0))
2895         return orig_expr;
2896
2897       expr = build_fold_addr_expr (op0);
2898       return fold_convert (orig_type, expr);
2899
2900     case MEM_REF:
2901       /* ???  Offset operand?  */
2902       inside_addr = false;
2903       break;
2904
2905     default:
2906       if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2907         return build_int_cst (orig_type, 0);
2908       return orig_expr;
2909     }
2910
2911   /* Default handling of expressions for that we want to recurse into
2912      the first operand.  */
2913   op0 = TREE_OPERAND (expr, 0);
2914   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2915   *offset += off0;
2916
2917   if (op0 == TREE_OPERAND (expr, 0)
2918       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2919     return orig_expr;
2920
2921   expr = copy_node (expr);
2922   TREE_OPERAND (expr, 0) = op0;
2923   if (op1)
2924     TREE_OPERAND (expr, 1) = op1;
2925
2926   /* Inside address, we might strip the top level component references,
2927      thus changing type of the expression.  Handling of ADDR_EXPR
2928      will fix that.  */
2929   expr = fold_convert (orig_type, expr);
2930
2931   return expr;
2932 }
2933
2934 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2935
2936 static tree
2937 strip_offset (tree expr, poly_uint64 *offset)
2938 {
2939   poly_int64 off;
2940   tree core = strip_offset_1 (expr, false, false, &off);
2941   *offset = off;
2942   return core;
2943 }
2944
2945 /* Returns variant of TYPE that can be used as base for different uses.
2946    We return unsigned type with the same precision, which avoids problems
2947    with overflows.  */
2948
2949 static tree
2950 generic_type_for (tree type)
2951 {
2952   if (POINTER_TYPE_P (type))
2953     return unsigned_type_for (type);
2954
2955   if (TYPE_UNSIGNED (type))
2956     return type;
2957
2958   return unsigned_type_for (type);
2959 }
2960
2961 /* Private data for walk_tree.  */
2962
2963 struct walk_tree_data
2964 {
2965   bitmap *inv_vars;
2966   struct ivopts_data *idata;
2967 };
2968
2969 /* Callback function for walk_tree, it records invariants and symbol
2970    reference in *EXPR_P.  DATA is the structure storing result info.  */
2971
2972 static tree
2973 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2974 {
2975   tree op = *expr_p;
2976   struct version_info *info;
2977   struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2978
2979   if (TREE_CODE (op) != SSA_NAME)
2980     return NULL_TREE;
2981
2982   info = name_info (wdata->idata, op);
2983   /* Because we expand simple operations when finding IVs, loop invariant
2984      variable that isn't referred by the original loop could be used now.
2985      Record such invariant variables here.  */
2986   if (!info->iv)
2987     {
2988       struct ivopts_data *idata = wdata->idata;
2989       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2990
2991       if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2992         {
2993           tree steptype = TREE_TYPE (op);
2994           if (POINTER_TYPE_P (steptype))
2995             steptype = sizetype;
2996           set_iv (idata, op, op, build_int_cst (steptype, 0), true);
2997           record_invariant (idata, op, false);
2998         }
2999     }
3000   if (!info->inv_id || info->has_nonlin_use)
3001     return NULL_TREE;
3002
3003   if (!*wdata->inv_vars)
3004     *wdata->inv_vars = BITMAP_ALLOC (NULL);
3005   bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3006
3007   return NULL_TREE;
3008 }
3009
3010 /* Records invariants in *EXPR_P.  INV_VARS is the bitmap to that we should
3011    store it.  */
3012
3013 static inline void
3014 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3015 {
3016   struct walk_tree_data wdata;
3017
3018   if (!inv_vars)
3019     return;
3020
3021   wdata.idata = data;
3022   wdata.inv_vars = inv_vars;
3023   walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3024 }
3025
3026 /* Get entry from invariant expr hash table for INV_EXPR.  New entry
3027    will be recorded if it doesn't exist yet.  Given below two exprs:
3028      inv_expr + cst1, inv_expr + cst2
3029    It's hard to make decision whether constant part should be stripped
3030    or not.  We choose to not strip based on below facts:
3031      1) We need to count ADD cost for constant part if it's stripped,
3032         which isn't always trivial where this functions is called.
3033      2) Stripping constant away may be conflict with following loop
3034         invariant hoisting pass.
3035      3) Not stripping constant away results in more invariant exprs,
3036         which usually leads to decision preferring lower reg pressure.  */
3037
3038 static iv_inv_expr_ent *
3039 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3040 {
3041   STRIP_NOPS (inv_expr);
3042
3043   if (poly_int_tree_p (inv_expr)
3044       || TREE_CODE (inv_expr) == SSA_NAME)
3045     return NULL;
3046
3047   /* Don't strip constant part away as we used to.  */
3048
3049   /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
3050   struct iv_inv_expr_ent ent;
3051   ent.expr = inv_expr;
3052   ent.hash = iterative_hash_expr (inv_expr, 0);
3053   struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3054
3055   if (!*slot)
3056     {
3057       *slot = XNEW (struct iv_inv_expr_ent);
3058       (*slot)->expr = inv_expr;
3059       (*slot)->hash = ent.hash;
3060       (*slot)->id = ++data->max_inv_expr_id;
3061     }
3062
3063   return *slot;
3064 }
3065
3066
3067 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3068    unsuitable as ivopts candidates for potentially involving undefined
3069    behavior.  */
3070
3071 static tree
3072 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3073 {
3074   basic_block bb = (basic_block) bb_;
3075   if (TREE_CODE (*tp) == SSA_NAME
3076       && ssa_name_maybe_undef_p (*tp)
3077       && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3078     return *tp;
3079   if (!EXPR_P (*tp))
3080     *walk_subtrees = 0;
3081   return NULL;
3082 }
3083
3084 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3085    position to POS.  If USE is not NULL, the candidate is set as related to
3086    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
3087    replacement of the final value of the iv by a direct computation.  */
3088
3089 static struct iv_cand *
3090 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3091                  enum iv_position pos, struct iv_use *use,
3092                  gimple *incremented_at, struct iv *orig_iv = NULL,
3093                  bool doloop = false)
3094 {
3095   unsigned i;
3096   struct iv_cand *cand = NULL;
3097   tree type, orig_type;
3098
3099   gcc_assert (base && step);
3100
3101   /* -fkeep-gc-roots-live means that we have to keep a real pointer
3102      live, but the ivopts code may replace a real pointer with one
3103      pointing before or after the memory block that is then adjusted
3104      into the memory block during the loop.  FIXME: It would likely be
3105      better to actually force the pointer live and still use ivopts;
3106      for example, it would be enough to write the pointer into memory
3107      and keep it there until after the loop.  */
3108   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3109     return NULL;
3110
3111   /* If BASE contains undefined SSA names make sure we only record
3112      the original IV.  */
3113   bool involves_undefs = false;
3114   if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3115     {
3116       if (pos != IP_ORIGINAL)
3117         return NULL;
3118       important = false;
3119       involves_undefs = true;
3120     }
3121
3122   /* For non-original variables, make sure their values are computed in a type
3123      that does not invoke undefined behavior on overflows (since in general,
3124      we cannot prove that these induction variables are non-wrapping).  */
3125   if (pos != IP_ORIGINAL)
3126     {
3127       orig_type = TREE_TYPE (base);
3128       type = generic_type_for (orig_type);
3129       if (type != orig_type)
3130         {
3131           base = fold_convert (type, base);
3132           step = fold_convert (type, step);
3133         }
3134     }
3135
3136   for (i = 0; i < data->vcands.length (); i++)
3137     {
3138       cand = data->vcands[i];
3139
3140       if (cand->pos != pos)
3141         continue;
3142
3143       if (cand->incremented_at != incremented_at
3144           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3145               && cand->ainc_use != use))
3146         continue;
3147
3148       if (operand_equal_p (base, cand->iv->base, 0)
3149           && operand_equal_p (step, cand->iv->step, 0)
3150           && (TYPE_PRECISION (TREE_TYPE (base))
3151               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3152         break;
3153     }
3154
3155   if (i == data->vcands.length ())
3156     {
3157       cand = XCNEW (struct iv_cand);
3158       cand->id = i;
3159       cand->iv = alloc_iv (data, base, step);
3160       cand->pos = pos;
3161       if (pos != IP_ORIGINAL)
3162         {
3163           if (doloop)
3164             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3165           else
3166             cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3167           cand->var_after = cand->var_before;
3168         }
3169       cand->important = important;
3170       cand->involves_undefs = involves_undefs;
3171       cand->incremented_at = incremented_at;
3172       cand->doloop_p = doloop;
3173       data->vcands.safe_push (cand);
3174
3175       if (!poly_int_tree_p (step))
3176         {
3177           find_inv_vars (data, &step, &cand->inv_vars);
3178
3179           iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3180           /* Share bitmap between inv_vars and inv_exprs for cand.  */
3181           if (inv_expr != NULL)
3182             {
3183               cand->inv_exprs = cand->inv_vars;
3184               cand->inv_vars = NULL;
3185               if (cand->inv_exprs)
3186                 bitmap_clear (cand->inv_exprs);
3187               else
3188                 cand->inv_exprs = BITMAP_ALLOC (NULL);
3189
3190               bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3191             }
3192         }
3193
3194       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3195         cand->ainc_use = use;
3196       else
3197         cand->ainc_use = NULL;
3198
3199       cand->orig_iv = orig_iv;
3200       if (dump_file && (dump_flags & TDF_DETAILS))
3201         dump_cand (dump_file, cand);
3202     }
3203
3204   cand->important |= important;
3205   cand->doloop_p |= doloop;
3206
3207   /* Relate candidate to the group for which it is added.  */
3208   if (use)
3209     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3210
3211   return cand;
3212 }
3213
3214 /* Returns true if incrementing the induction variable at the end of the LOOP
3215    is allowed.
3216
3217    The purpose is to avoid splitting latch edge with a biv increment, thus
3218    creating a jump, possibly confusing other optimization passes and leaving
3219    less freedom to scheduler.  So we allow IP_END only if IP_NORMAL is not
3220    available (so we do not have a better alternative), or if the latch edge
3221    is already nonempty.  */
3222
3223 static bool
3224 allow_ip_end_pos_p (class loop *loop)
3225 {
3226   if (!ip_normal_pos (loop))
3227     return true;
3228
3229   if (!empty_block_p (ip_end_pos (loop)))
3230     return true;
3231
3232   return false;
3233 }
3234
3235 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3236    Important field is set to IMPORTANT.  */
3237
3238 static void
3239 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3240                         bool important, struct iv_use *use)
3241 {
3242   basic_block use_bb = gimple_bb (use->stmt);
3243   machine_mode mem_mode;
3244   unsigned HOST_WIDE_INT cstepi;
3245
3246   /* If we insert the increment in any position other than the standard
3247      ones, we must ensure that it is incremented once per iteration.
3248      It must not be in an inner nested loop, or one side of an if
3249      statement.  */
3250   if (use_bb->loop_father != data->current_loop
3251       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3252       || stmt_can_throw_internal (cfun, use->stmt)
3253       || !cst_and_fits_in_hwi (step))
3254     return;
3255
3256   cstepi = int_cst_value (step);
3257
3258   mem_mode = TYPE_MODE (use->mem_type);
3259   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3260         || USE_STORE_PRE_INCREMENT (mem_mode))
3261        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3262       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3263            || USE_STORE_PRE_DECREMENT (mem_mode))
3264           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3265     {
3266       enum tree_code code = MINUS_EXPR;
3267       tree new_base;
3268       tree new_step = step;
3269
3270       if (POINTER_TYPE_P (TREE_TYPE (base)))
3271         {
3272           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3273           code = POINTER_PLUS_EXPR;
3274         }
3275       else
3276         new_step = fold_convert (TREE_TYPE (base), new_step);
3277       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3278       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3279                        use->stmt);
3280     }
3281   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3282         || USE_STORE_POST_INCREMENT (mem_mode))
3283        && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3284       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3285            || USE_STORE_POST_DECREMENT (mem_mode))
3286           && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3287     {
3288       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3289                        use->stmt);
3290     }
3291 }
3292
3293 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3294    position to POS.  If USE is not NULL, the candidate is set as related to
3295    it.  The candidate computation is scheduled before exit condition and at
3296    the end of loop.  */
3297
3298 static void
3299 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3300                struct iv_use *use, struct iv *orig_iv = NULL,
3301                bool doloop = false)
3302 {
3303   if (ip_normal_pos (data->current_loop))
3304     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3305                      doloop);
3306   /* Exclude doloop candidate here since it requires decrement then comparison
3307      and jump, the IP_END position doesn't match.  */
3308   if (!doloop && ip_end_pos (data->current_loop)
3309       && allow_ip_end_pos_p (data->current_loop))
3310     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3311 }
3312
3313 /* Adds standard iv candidates.  */
3314
3315 static void
3316 add_standard_iv_candidates (struct ivopts_data *data)
3317 {
3318   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3319
3320   /* The same for a double-integer type if it is still fast enough.  */
3321   if (TYPE_PRECISION
3322         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3323       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3324     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3325                    build_int_cst (long_integer_type_node, 1), true, NULL);
3326
3327   /* The same for a double-integer type if it is still fast enough.  */
3328   if (TYPE_PRECISION
3329         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3330       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3331     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3332                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3333 }
3334
3335
3336 /* Adds candidates bases on the old induction variable IV.  */
3337
3338 static void
3339 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3340 {
3341   gimple *phi;
3342   tree def;
3343   struct iv_cand *cand;
3344
3345   /* Check if this biv is used in address type use.  */
3346   if (iv->no_overflow  && iv->have_address_use
3347       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3348       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3349     {
3350       tree base = fold_convert (sizetype, iv->base);
3351       tree step = fold_convert (sizetype, iv->step);
3352
3353       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3354       add_candidate (data, base, step, true, NULL, iv);
3355       /* Add iv cand of the original type only if it has nonlinear use.  */
3356       if (iv->nonlin_use)
3357         add_candidate (data, iv->base, iv->step, true, NULL);
3358     }
3359   else
3360     add_candidate (data, iv->base, iv->step, true, NULL);
3361
3362   /* The same, but with initial value zero.  */
3363   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3364     add_candidate (data, size_int (0), iv->step, true, NULL);
3365   else
3366     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3367                    iv->step, true, NULL);
3368
3369   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3370   if (gimple_code (phi) == GIMPLE_PHI)
3371     {
3372       /* Additionally record the possibility of leaving the original iv
3373          untouched.  */
3374       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3375       /* Don't add candidate if it's from another PHI node because
3376          it's an affine iv appearing in the form of PEELED_CHREC.  */
3377       phi = SSA_NAME_DEF_STMT (def);
3378       if (gimple_code (phi) != GIMPLE_PHI)
3379         {
3380           cand = add_candidate_1 (data,
3381                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3382                                   SSA_NAME_DEF_STMT (def));
3383           if (cand)
3384             {
3385               cand->var_before = iv->ssa_name;
3386               cand->var_after = def;
3387             }
3388         }
3389       else
3390         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3391     }
3392 }
3393
3394 /* Adds candidates based on the old induction variables.  */
3395
3396 static void
3397 add_iv_candidate_for_bivs (struct ivopts_data *data)
3398 {
3399   unsigned i;
3400   struct iv *iv;
3401   bitmap_iterator bi;
3402
3403   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3404     {
3405       iv = ver_info (data, i)->iv;
3406       if (iv && iv->biv_p && !integer_zerop (iv->step))
3407         add_iv_candidate_for_biv (data, iv);
3408     }
3409 }
3410
3411 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3412
3413 static void
3414 record_common_cand (struct ivopts_data *data, tree base,
3415                     tree step, struct iv_use *use)
3416 {
3417   class iv_common_cand ent;
3418   class iv_common_cand **slot;
3419
3420   ent.base = base;
3421   ent.step = step;
3422   ent.hash = iterative_hash_expr (base, 0);
3423   ent.hash = iterative_hash_expr (step, ent.hash);
3424
3425   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3426   if (*slot == NULL)
3427     {
3428       *slot = new iv_common_cand ();
3429       (*slot)->base = base;
3430       (*slot)->step = step;
3431       (*slot)->uses.create (8);
3432       (*slot)->hash = ent.hash;
3433       data->iv_common_cands.safe_push ((*slot));
3434     }
3435
3436   gcc_assert (use != NULL);
3437   (*slot)->uses.safe_push (use);
3438   return;
3439 }
3440
3441 /* Comparison function used to sort common candidates.  */
3442
3443 static int
3444 common_cand_cmp (const void *p1, const void *p2)
3445 {
3446   unsigned n1, n2;
3447   const class iv_common_cand *const *const ccand1
3448     = (const class iv_common_cand *const *)p1;
3449   const class iv_common_cand *const *const ccand2
3450     = (const class iv_common_cand *const *)p2;
3451
3452   n1 = (*ccand1)->uses.length ();
3453   n2 = (*ccand2)->uses.length ();
3454   return n2 - n1;
3455 }
3456
3457 /* Adds IV candidates based on common candidated recorded.  */
3458
3459 static void
3460 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3461 {
3462   unsigned i, j;
3463   struct iv_cand *cand_1, *cand_2;
3464
3465   data->iv_common_cands.qsort (common_cand_cmp);
3466   for (i = 0; i < data->iv_common_cands.length (); i++)
3467     {
3468       class iv_common_cand *ptr = data->iv_common_cands[i];
3469
3470       /* Only add IV candidate if it's derived from multiple uses.  */
3471       if (ptr->uses.length () <= 1)
3472         break;
3473
3474       cand_1 = NULL;
3475       cand_2 = NULL;
3476       if (ip_normal_pos (data->current_loop))
3477         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3478                                   false, IP_NORMAL, NULL, NULL);
3479
3480       if (ip_end_pos (data->current_loop)
3481           && allow_ip_end_pos_p (data->current_loop))
3482         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3483                                   false, IP_END, NULL, NULL);
3484
3485       /* Bind deriving uses and the new candidates.  */
3486       for (j = 0; j < ptr->uses.length (); j++)
3487         {
3488           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3489           if (cand_1)
3490             bitmap_set_bit (group->related_cands, cand_1->id);
3491           if (cand_2)
3492             bitmap_set_bit (group->related_cands, cand_2->id);
3493         }
3494     }
3495
3496   /* Release data since it is useless from this point.  */
3497   data->iv_common_cand_tab->empty ();
3498   data->iv_common_cands.truncate (0);
3499 }
3500
3501 /* Adds candidates based on the value of USE's iv.  */
3502
3503 static void
3504 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3505 {
3506   poly_uint64 offset;
3507   tree base;
3508   struct iv *iv = use->iv;
3509   tree basetype = TREE_TYPE (iv->base);
3510
3511   /* Don't add candidate for iv_use with non integer, pointer or non-mode
3512      precision types, instead, add candidate for the corresponding scev in
3513      unsigned type with the same precision.  See PR93674 for more info.  */
3514   if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3515       || !type_has_mode_precision_p (basetype))
3516     {
3517       basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3518                                                  TYPE_UNSIGNED (basetype));
3519       add_candidate (data, fold_convert (basetype, iv->base),
3520                      fold_convert (basetype, iv->step), false, NULL);
3521       return;
3522     }
3523
3524   add_candidate (data, iv->base, iv->step, false, use);
3525
3526   /* Record common candidate for use in case it can be shared by others.  */
3527   record_common_cand (data, iv->base, iv->step, use);
3528
3529   /* Record common candidate with initial value zero.  */
3530   basetype = TREE_TYPE (iv->base);
3531   if (POINTER_TYPE_P (basetype))
3532     basetype = sizetype;
3533   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3534
3535   /* Compare the cost of an address with an unscaled index with the cost of
3536     an address with a scaled index and add candidate if useful.  */
3537   poly_int64 step;
3538   if (use != NULL
3539       && poly_int_tree_p (iv->step, &step)
3540       && address_p (use->type))
3541     {
3542       poly_int64 new_step;
3543       unsigned int fact = preferred_mem_scale_factor
3544         (use->iv->base,
3545          TYPE_MODE (use->mem_type),
3546          optimize_loop_for_speed_p (data->current_loop));
3547
3548       if (fact != 1
3549           && multiple_p (step, fact, &new_step))
3550         add_candidate (data, size_int (0),
3551                        wide_int_to_tree (sizetype, new_step),
3552                        true, NULL);
3553     }
3554
3555   /* Record common candidate with constant offset stripped in base.
3556      Like the use itself, we also add candidate directly for it.  */
3557   base = strip_offset (iv->base, &offset);
3558   if (maybe_ne (offset, 0U) || base != iv->base)
3559     {
3560       record_common_cand (data, base, iv->step, use);
3561       add_candidate (data, base, iv->step, false, use);
3562     }
3563
3564   /* Record common candidate with base_object removed in base.  */
3565   base = iv->base;
3566   STRIP_NOPS (base);
3567   if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3568     {
3569       tree step = iv->step;
3570
3571       STRIP_NOPS (step);
3572       base = TREE_OPERAND (base, 1);
3573       step = fold_convert (sizetype, step);
3574       record_common_cand (data, base, step, use);
3575       /* Also record common candidate with offset stripped.  */
3576       tree alt_base, alt_offset;
3577       split_constant_offset (base, &alt_base, &alt_offset);
3578       if (!integer_zerop (alt_offset))
3579         record_common_cand (data, alt_base, step, use);
3580     }
3581
3582   /* At last, add auto-incremental candidates.  Make such variables
3583      important since other iv uses with same base object may be based
3584      on it.  */
3585   if (use != NULL && address_p (use->type))
3586     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3587 }
3588
3589 /* Adds candidates based on the uses.  */
3590
3591 static void
3592 add_iv_candidate_for_groups (struct ivopts_data *data)
3593 {
3594   unsigned i;
3595
3596   /* Only add candidate for the first use in group.  */
3597   for (i = 0; i < data->vgroups.length (); i++)
3598     {
3599       struct iv_group *group = data->vgroups[i];
3600
3601       gcc_assert (group->vuses[0] != NULL);
3602       add_iv_candidate_for_use (data, group->vuses[0]);
3603     }
3604   add_iv_candidate_derived_from_uses (data);
3605 }
3606
3607 /* Record important candidates and add them to related_cands bitmaps.  */
3608
3609 static void
3610 record_important_candidates (struct ivopts_data *data)
3611 {
3612   unsigned i;
3613   struct iv_group *group;
3614
3615   for (i = 0; i < data->vcands.length (); i++)
3616     {
3617       struct iv_cand *cand = data->vcands[i];
3618
3619       if (cand->important)
3620         bitmap_set_bit (data->important_candidates, i);
3621     }
3622
3623   data->consider_all_candidates = (data->vcands.length ()
3624                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3625
3626   /* Add important candidates to groups' related_cands bitmaps.  */
3627   for (i = 0; i < data->vgroups.length (); i++)
3628     {
3629       group = data->vgroups[i];
3630       bitmap_ior_into (group->related_cands, data->important_candidates);
3631     }
3632 }
3633
3634 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3635    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3636    we allocate a simple list to every use.  */
3637
3638 static void
3639 alloc_use_cost_map (struct ivopts_data *data)
3640 {
3641   unsigned i, size, s;
3642
3643   for (i = 0; i < data->vgroups.length (); i++)
3644     {
3645       struct iv_group *group = data->vgroups[i];
3646
3647       if (data->consider_all_candidates)
3648         size = data->vcands.length ();
3649       else
3650         {
3651           s = bitmap_count_bits (group->related_cands);
3652
3653           /* Round up to the power of two, so that moduling by it is fast.  */
3654           size = s ? (1 << ceil_log2 (s)) : 1;
3655         }
3656
3657       group->n_map_members = size;
3658       group->cost_map = XCNEWVEC (class cost_pair, size);
3659     }
3660 }
3661
3662 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3663    on invariants INV_VARS and that the value used in expressing it is
3664    VALUE, and in case of iv elimination the comparison operator is COMP.  */
3665
3666 static void
3667 set_group_iv_cost (struct ivopts_data *data,
3668                    struct iv_group *group, struct iv_cand *cand,
3669                    comp_cost cost, bitmap inv_vars, tree value,
3670                    enum tree_code comp, bitmap inv_exprs)
3671 {
3672   unsigned i, s;
3673
3674   if (cost.infinite_cost_p ())
3675     {
3676       BITMAP_FREE (inv_vars);
3677       BITMAP_FREE (inv_exprs);
3678       return;
3679     }
3680
3681   if (data->consider_all_candidates)
3682     {
3683       group->cost_map[cand->id].cand = cand;
3684       group->cost_map[cand->id].cost = cost;
3685       group->cost_map[cand->id].inv_vars = inv_vars;
3686       group->cost_map[cand->id].inv_exprs = inv_exprs;
3687       group->cost_map[cand->id].value = value;
3688       group->cost_map[cand->id].comp = comp;
3689       return;
3690     }
3691
3692   /* n_map_members is a power of two, so this computes modulo.  */
3693   s = cand->id & (group->n_map_members - 1);
3694   for (i = s; i < group->n_map_members; i++)
3695     if (!group->cost_map[i].cand)
3696       goto found;
3697   for (i = 0; i < s; i++)
3698     if (!group->cost_map[i].cand)
3699       goto found;
3700
3701   gcc_unreachable ();
3702
3703 found:
3704   group->cost_map[i].cand = cand;
3705   group->cost_map[i].cost = cost;
3706   group->cost_map[i].inv_vars = inv_vars;
3707   group->cost_map[i].inv_exprs = inv_exprs;
3708   group->cost_map[i].value = value;
3709   group->cost_map[i].comp = comp;
3710 }
3711
3712 /* Gets cost of (GROUP, CAND) pair.  */
3713
3714 static class cost_pair *
3715 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3716                    struct iv_cand *cand)
3717 {
3718   unsigned i, s;
3719   class cost_pair *ret;
3720
3721   if (!cand)
3722     return NULL;
3723
3724   if (data->consider_all_candidates)
3725     {
3726       ret = group->cost_map + cand->id;
3727       if (!ret->cand)
3728         return NULL;
3729
3730       return ret;
3731     }
3732
3733   /* n_map_members is a power of two, so this computes modulo.  */
3734   s = cand->id & (group->n_map_members - 1);
3735   for (i = s; i < group->n_map_members; i++)
3736     if (group->cost_map[i].cand == cand)
3737       return group->cost_map + i;
3738     else if (group->cost_map[i].cand == NULL)
3739       return NULL;
3740   for (i = 0; i < s; i++)
3741     if (group->cost_map[i].cand == cand)
3742       return group->cost_map + i;
3743     else if (group->cost_map[i].cand == NULL)
3744       return NULL;
3745
3746   return NULL;
3747 }
3748
3749 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3750 static rtx
3751 produce_memory_decl_rtl (tree obj, int *regno)
3752 {
3753   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3754   machine_mode address_mode = targetm.addr_space.address_mode (as);
3755   rtx x;
3756
3757   gcc_assert (obj);
3758   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3759     {
3760       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3761       x = gen_rtx_SYMBOL_REF (address_mode, name);
3762       SET_SYMBOL_REF_DECL (x, obj);
3763       x = gen_rtx_MEM (DECL_MODE (obj), x);
3764       set_mem_addr_space (x, as);
3765       targetm.encode_section_info (obj, x, true);
3766     }
3767   else
3768     {
3769       x = gen_raw_REG (address_mode, (*regno)++);
3770       x = gen_rtx_MEM (DECL_MODE (obj), x);
3771       set_mem_addr_space (x, as);
3772     }
3773
3774   return x;
3775 }
3776
3777 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3778    walk_tree.  DATA contains the actual fake register number.  */
3779
3780 static tree
3781 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3782 {
3783   tree obj = NULL_TREE;
3784   rtx x = NULL_RTX;
3785   int *regno = (int *) data;
3786
3787   switch (TREE_CODE (*expr_p))
3788     {
3789     case ADDR_EXPR:
3790       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3791            handled_component_p (*expr_p);
3792            expr_p = &TREE_OPERAND (*expr_p, 0))
3793         continue;
3794       obj = *expr_p;
3795       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3796         x = produce_memory_decl_rtl (obj, regno);
3797       break;
3798
3799     case SSA_NAME:
3800       *ws = 0;
3801       obj = SSA_NAME_VAR (*expr_p);
3802       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3803       if (!obj)
3804         return NULL_TREE;
3805       if (!DECL_RTL_SET_P (obj))
3806         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3807       break;
3808
3809     case VAR_DECL:
3810     case PARM_DECL:
3811     case RESULT_DECL:
3812       *ws = 0;
3813       obj = *expr_p;
3814
3815       if (DECL_RTL_SET_P (obj))
3816         break;
3817
3818       if (DECL_MODE (obj) == BLKmode)
3819         x = produce_memory_decl_rtl (obj, regno);
3820       else
3821         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3822
3823       break;
3824
3825     default:
3826       break;
3827     }
3828
3829   if (x)
3830     {
3831       decl_rtl_to_reset.safe_push (obj);
3832       SET_DECL_RTL (obj, x);
3833     }
3834
3835   return NULL_TREE;
3836 }
3837
3838 /* Predict whether the given loop will be transformed in the RTL
3839    doloop_optimize pass.  Attempt to duplicate some doloop_optimize checks.
3840    This is only for target independent checks, see targetm.predict_doloop_p
3841    for the target dependent ones.
3842
3843    Note that according to some initial investigation, some checks like costly
3844    niter check and invalid stmt scanning don't have much gains among general
3845    cases, so keep this as simple as possible first.
3846
3847    Some RTL specific checks seems unable to be checked in gimple, if any new
3848    checks or easy checks _are_ missing here, please add them.  */
3849
3850 static bool
3851 generic_predict_doloop_p (struct ivopts_data *data)
3852 {
3853   class loop *loop = data->current_loop;
3854
3855   /* Call target hook for target dependent checks.  */
3856   if (!targetm.predict_doloop_p (loop))
3857     {
3858       if (dump_file && (dump_flags & TDF_DETAILS))
3859         fprintf (dump_file, "Predict doloop failure due to"
3860                             " target specific checks.\n");
3861       return false;
3862     }
3863
3864   /* Similar to doloop_optimize, check iteration description to know it's
3865      suitable or not.  Keep it as simple as possible, feel free to extend it
3866      if you find any multiple exits cases matter.  */
3867   edge exit = single_dom_exit (loop);
3868   class tree_niter_desc *niter_desc;
3869   if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3870     {
3871       if (dump_file && (dump_flags & TDF_DETAILS))
3872         fprintf (dump_file, "Predict doloop failure due to"
3873                             " unexpected niters.\n");
3874       return false;
3875     }
3876
3877   /* Similar to doloop_optimize, check whether iteration count too small
3878      and not profitable.  */
3879   HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3880   if (est_niter == -1)
3881     est_niter = get_likely_max_loop_iterations_int (loop);
3882   if (est_niter >= 0 && est_niter < 3)
3883     {
3884       if (dump_file && (dump_flags & TDF_DETAILS))
3885         fprintf (dump_file,
3886                  "Predict doloop failure due to"
3887                  " too few iterations (%u).\n",
3888                  (unsigned int) est_niter);
3889       return false;
3890     }
3891
3892   return true;
3893 }
3894
3895 /* Determines cost of the computation of EXPR.  */
3896
3897 static unsigned
3898 computation_cost (tree expr, bool speed)
3899 {
3900   rtx_insn *seq;
3901   rtx rslt;
3902   tree type = TREE_TYPE (expr);
3903   unsigned cost;
3904   /* Avoid using hard regs in ways which may be unsupported.  */
3905   int regno = LAST_VIRTUAL_REGISTER + 1;
3906   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3907   enum node_frequency real_frequency = node->frequency;
3908
3909   node->frequency = NODE_FREQUENCY_NORMAL;
3910   crtl->maybe_hot_insn_p = speed;
3911   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3912   start_sequence ();
3913   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3914   seq = get_insns ();
3915   end_sequence ();
3916   default_rtl_profile ();
3917   node->frequency = real_frequency;
3918
3919   cost = seq_cost (seq, speed);
3920   if (MEM_P (rslt))
3921     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3922                           TYPE_ADDR_SPACE (type), speed);
3923   else if (!REG_P (rslt))
3924     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3925
3926   return cost;
3927 }
3928
3929 /* Returns variable containing the value of candidate CAND at statement AT.  */
3930
3931 static tree
3932 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3933 {
3934   if (stmt_after_increment (loop, cand, stmt))
3935     return cand->var_after;
3936   else
3937     return cand->var_before;
3938 }
3939
3940 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3941    same precision that is at least as wide as the precision of TYPE, stores
3942    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3943    type of A and B.  */
3944
3945 static tree
3946 determine_common_wider_type (tree *a, tree *b)
3947 {
3948   tree wider_type = NULL;
3949   tree suba, subb;
3950   tree atype = TREE_TYPE (*a);
3951
3952   if (CONVERT_EXPR_P (*a))
3953     {
3954       suba = TREE_OPERAND (*a, 0);
3955       wider_type = TREE_TYPE (suba);
3956       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3957         return atype;
3958     }
3959   else
3960     return atype;
3961
3962   if (CONVERT_EXPR_P (*b))
3963     {
3964       subb = TREE_OPERAND (*b, 0);
3965       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3966         return atype;
3967     }
3968   else
3969     return atype;
3970
3971   *a = suba;
3972   *b = subb;
3973   return wider_type;
3974 }
3975
3976 /* Determines the expression by that USE is expressed from induction variable
3977    CAND at statement AT in LOOP.  The expression is stored in two parts in a
3978    decomposed form.  The invariant part is stored in AFF_INV; while variant
3979    part in AFF_VAR.  Store ratio of CAND.step over USE.step in PRAT if it's
3980    non-null.  Returns false if USE cannot be expressed using CAND.  */
3981
3982 static bool
3983 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3984                        struct iv_cand *cand, class aff_tree *aff_inv,
3985                        class aff_tree *aff_var, widest_int *prat = NULL)
3986 {
3987   tree ubase = use->iv->base, ustep = use->iv->step;
3988   tree cbase = cand->iv->base, cstep = cand->iv->step;
3989   tree common_type, uutype, var, cstep_common;
3990   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3991   aff_tree aff_cbase;
3992   widest_int rat;
3993
3994   /* We must have a precision to express the values of use.  */
3995   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3996     return false;
3997
3998   var = var_at_stmt (loop, cand, at);
3999   uutype = unsigned_type_for (utype);
4000
4001   /* If the conversion is not noop, perform it.  */
4002   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4003     {
4004       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4005           && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4006         {
4007           tree inner_base, inner_step, inner_type;
4008           inner_base = TREE_OPERAND (cbase, 0);
4009           if (CONVERT_EXPR_P (cstep))
4010             inner_step = TREE_OPERAND (cstep, 0);
4011           else
4012             inner_step = cstep;
4013
4014           inner_type = TREE_TYPE (inner_base);
4015           /* If candidate is added from a biv whose type is smaller than
4016              ctype, we know both candidate and the biv won't overflow.
4017              In this case, it's safe to skip the convertion in candidate.
4018              As an example, (unsigned short)((unsigned long)A) equals to
4019              (unsigned short)A, if A has a type no larger than short.  */
4020           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4021             {
4022               cbase = inner_base;
4023               cstep = inner_step;
4024             }
4025         }
4026       cbase = fold_convert (uutype, cbase);
4027       cstep = fold_convert (uutype, cstep);
4028       var = fold_convert (uutype, var);
4029     }
4030
4031   /* Ratio is 1 when computing the value of biv cand by itself.
4032      We can't rely on constant_multiple_of in this case because the
4033      use is created after the original biv is selected.  The call
4034      could fail because of inconsistent fold behavior.  See PR68021
4035      for more information.  */
4036   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4037     {
4038       gcc_assert (is_gimple_assign (use->stmt));
4039       gcc_assert (use->iv->ssa_name == cand->var_after);
4040       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4041       rat = 1;
4042     }
4043   else if (!constant_multiple_of (ustep, cstep, &rat))
4044     return false;
4045
4046   if (prat)
4047     *prat = rat;
4048
4049   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4050      type, we achieve better folding by computing their difference in this
4051      wider type, and cast the result to UUTYPE.  We do not need to worry about
4052      overflows, as all the arithmetics will in the end be performed in UUTYPE
4053      anyway.  */
4054   common_type = determine_common_wider_type (&ubase, &cbase);
4055
4056   /* use = ubase - ratio * cbase + ratio * var.  */
4057   tree_to_aff_combination (ubase, common_type, aff_inv);
4058   tree_to_aff_combination (cbase, common_type, &aff_cbase);
4059   tree_to_aff_combination (var, uutype, aff_var);
4060
4061   /* We need to shift the value if we are after the increment.  */
4062   if (stmt_after_increment (loop, cand, at))
4063     {
4064       aff_tree cstep_aff;
4065
4066       if (common_type != uutype)
4067         cstep_common = fold_convert (common_type, cstep);
4068       else
4069         cstep_common = cstep;
4070
4071       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4072       aff_combination_add (&aff_cbase, &cstep_aff);
4073     }
4074
4075   aff_combination_scale (&aff_cbase, -rat);
4076   aff_combination_add (aff_inv, &aff_cbase);
4077   if (common_type != uutype)
4078     aff_combination_convert (aff_inv, uutype);
4079
4080   aff_combination_scale (aff_var, rat);
4081   return true;
4082 }
4083
4084 /* Determines the expression by that USE is expressed from induction variable
4085    CAND at statement AT in LOOP.  The expression is stored in a decomposed
4086    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
4087
4088 static bool
4089 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4090                      struct iv_cand *cand, class aff_tree *aff)
4091 {
4092   aff_tree aff_var;
4093
4094   if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4095     return false;
4096
4097   aff_combination_add (aff, &aff_var);
4098   return true;
4099 }
4100
4101 /* Return the type of USE.  */
4102
4103 static tree
4104 get_use_type (struct iv_use *use)
4105 {
4106   tree base_type = TREE_TYPE (use->iv->base);
4107   tree type;
4108
4109   if (use->type == USE_REF_ADDRESS)
4110     {
4111       /* The base_type may be a void pointer.  Create a pointer type based on
4112          the mem_ref instead.  */
4113       type = build_pointer_type (TREE_TYPE (*use->op_p));
4114       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4115                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4116     }
4117   else
4118     type = base_type;
4119
4120   return type;
4121 }
4122
4123 /* Determines the expression by that USE is expressed from induction variable
4124    CAND at statement AT in LOOP.  The computation is unshared.  */
4125
4126 static tree
4127 get_computation_at (class loop *loop, gimple *at,
4128                     struct iv_use *use, struct iv_cand *cand)
4129 {
4130   aff_tree aff;
4131   tree type = get_use_type (use);
4132
4133   if (!get_computation_aff (loop, at, use, cand, &aff))
4134     return NULL_TREE;
4135   unshare_aff_combination (&aff);
4136   return fold_convert (type, aff_combination_to_tree (&aff));
4137 }
4138
4139 /* Like get_computation_at, but try harder, even if the computation
4140    is more expensive.  Intended for debug stmts.  */
4141
4142 static tree
4143 get_debug_computation_at (class loop *loop, gimple *at,
4144                           struct iv_use *use, struct iv_cand *cand)
4145 {
4146   if (tree ret = get_computation_at (loop, at, use, cand))
4147     return ret;
4148
4149   tree ubase = use->iv->base, ustep = use->iv->step;
4150   tree cbase = cand->iv->base, cstep = cand->iv->step;
4151   tree var;
4152   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4153   widest_int rat;
4154
4155   /* We must have a precision to express the values of use.  */
4156   if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4157     return NULL_TREE;
4158
4159   /* Try to handle the case that get_computation_at doesn't,
4160      try to express
4161      use = ubase + (var - cbase) / ratio.  */
4162   if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4163                              &rat))
4164     return NULL_TREE;
4165
4166   bool neg_p = false;
4167   if (wi::neg_p (rat))
4168     {
4169       if (TYPE_UNSIGNED (ctype))
4170         return NULL_TREE;
4171       neg_p = true;
4172       rat = wi::neg (rat);
4173     }
4174
4175   /* If both IVs can wrap around and CAND doesn't have a power of two step,
4176      it is unsafe.  Consider uint16_t CAND with step 9, when wrapping around,
4177      the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4178      uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4179      ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59.  */
4180   if (!use->iv->no_overflow
4181       && !cand->iv->no_overflow
4182       && !integer_pow2p (cstep))
4183     return NULL_TREE;
4184
4185   int bits = wi::exact_log2 (rat);
4186   if (bits == -1)
4187     bits = wi::floor_log2 (rat) + 1;
4188   if (!cand->iv->no_overflow
4189       && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4190     return NULL_TREE;
4191
4192   var = var_at_stmt (loop, cand, at);
4193
4194   if (POINTER_TYPE_P (ctype))
4195     {
4196       ctype = unsigned_type_for (ctype);
4197       cbase = fold_convert (ctype, cbase);
4198       cstep = fold_convert (ctype, cstep);
4199       var = fold_convert (ctype, var);
4200     }
4201
4202   if (stmt_after_increment (loop, cand, at))
4203     var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4204                        unshare_expr (cstep));
4205
4206   var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4207   var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4208                      wide_int_to_tree (TREE_TYPE (var), rat));
4209   if (POINTER_TYPE_P (utype))
4210     {
4211       var = fold_convert (sizetype, var);
4212       if (neg_p)
4213         var = fold_build1 (NEGATE_EXPR, sizetype, var);
4214       var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4215     }
4216   else
4217     {
4218       var = fold_convert (utype, var);
4219       var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4220                          ubase, var);
4221     }
4222   return var;
4223 }
4224
4225 /* Adjust the cost COST for being in loop setup rather than loop body.
4226    If we're optimizing for space, the loop setup overhead is constant;
4227    if we're optimizing for speed, amortize it over the per-iteration cost.
4228    If ROUND_UP_P is true, the result is round up rather than to zero when
4229    optimizing for speed.  */
4230 static int64_t
4231 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4232                    bool round_up_p = false)
4233 {
4234   if (cost == INFTY)
4235     return cost;
4236   else if (optimize_loop_for_speed_p (data->current_loop))
4237     {
4238       int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4239       return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4240     }
4241   else
4242     return cost;
4243 }
4244
4245 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4246    EXPR operand holding the shift.  COST0 and COST1 are the costs for
4247    calculating the operands of EXPR.  Returns true if successful, and returns
4248    the cost in COST.  */
4249
4250 static bool
4251 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4252                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4253 {
4254   comp_cost res;
4255   tree op1 = TREE_OPERAND (expr, 1);
4256   tree cst = TREE_OPERAND (mult, 1);
4257   tree multop = TREE_OPERAND (mult, 0);
4258   int m = exact_log2 (int_cst_value (cst));
4259   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4260   int as_cost, sa_cost;
4261   bool mult_in_op1;
4262
4263   if (!(m >= 0 && m < maxm))
4264     return false;
4265
4266   STRIP_NOPS (op1);
4267   mult_in_op1 = operand_equal_p (op1, mult, 0);
4268
4269   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4270
4271   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4272      use that in preference to a shift insn followed by an add insn.  */
4273   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4274              ? shiftadd_cost (speed, mode, m)
4275              : (mult_in_op1
4276                 ? shiftsub1_cost (speed, mode, m)
4277                 : shiftsub0_cost (speed, mode, m)));
4278
4279   res = comp_cost (MIN (as_cost, sa_cost), 0);
4280   res += (mult_in_op1 ? cost0 : cost1);
4281
4282   STRIP_NOPS (multop);
4283   if (!is_gimple_val (multop))
4284     res += force_expr_to_var_cost (multop, speed);
4285
4286   *cost = res;
4287   return true;
4288 }
4289
4290 /* Estimates cost of forcing expression EXPR into a variable.  */
4291
4292 static comp_cost
4293 force_expr_to_var_cost (tree expr, bool speed)
4294 {
4295   static bool costs_initialized = false;
4296   static unsigned integer_cost [2];
4297   static unsigned symbol_cost [2];
4298   static unsigned address_cost [2];
4299   tree op0, op1;
4300   comp_cost cost0, cost1, cost;
4301   machine_mode mode;
4302   scalar_int_mode int_mode;
4303
4304   if (!costs_initialized)
4305     {
4306       tree type = build_pointer_type (integer_type_node);
4307       tree var, addr;
4308       rtx x;
4309       int i;
4310
4311       var = create_tmp_var_raw (integer_type_node, "test_var");
4312       TREE_STATIC (var) = 1;
4313       x = produce_memory_decl_rtl (var, NULL);
4314       SET_DECL_RTL (var, x);
4315
4316       addr = build1 (ADDR_EXPR, type, var);
4317
4318
4319       for (i = 0; i < 2; i++)
4320         {
4321           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4322                                                              2000), i);
4323
4324           symbol_cost[i] = computation_cost (addr, i) + 1;
4325
4326           address_cost[i]
4327             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4328           if (dump_file && (dump_flags & TDF_DETAILS))
4329             {
4330               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4331               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4332               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4333               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4334               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4335               fprintf (dump_file, "\n");
4336             }
4337         }
4338
4339       costs_initialized = true;
4340     }
4341
4342   STRIP_NOPS (expr);
4343
4344   if (SSA_VAR_P (expr))
4345     return no_cost;
4346
4347   if (is_gimple_min_invariant (expr))
4348     {
4349       if (poly_int_tree_p (expr))
4350         return comp_cost (integer_cost [speed], 0);
4351
4352       if (TREE_CODE (expr) == ADDR_EXPR)
4353         {
4354           tree obj = TREE_OPERAND (expr, 0);
4355
4356           if (VAR_P (obj)
4357               || TREE_CODE (obj) == PARM_DECL
4358               || TREE_CODE (obj) == RESULT_DECL)
4359             return comp_cost (symbol_cost [speed], 0);
4360         }
4361
4362       return comp_cost (address_cost [speed], 0);
4363     }
4364
4365   switch (TREE_CODE (expr))
4366     {
4367     case POINTER_PLUS_EXPR:
4368     case PLUS_EXPR:
4369     case MINUS_EXPR:
4370     case MULT_EXPR:
4371     case EXACT_DIV_EXPR:
4372     case TRUNC_DIV_EXPR:
4373     case BIT_AND_EXPR:
4374     case BIT_IOR_EXPR:
4375     case LSHIFT_EXPR:
4376     case RSHIFT_EXPR:
4377       op0 = TREE_OPERAND (expr, 0);
4378       op1 = TREE_OPERAND (expr, 1);
4379       STRIP_NOPS (op0);
4380       STRIP_NOPS (op1);
4381       break;
4382
4383     CASE_CONVERT:
4384     case NEGATE_EXPR:
4385     case BIT_NOT_EXPR:
4386       op0 = TREE_OPERAND (expr, 0);
4387       STRIP_NOPS (op0);
4388       op1 = NULL_TREE;
4389       break;
4390     /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4391        introduce COND_EXPR for IV base, need to support better cost estimation
4392        for this COND_EXPR and tcc_comparison.  */
4393     case COND_EXPR:
4394       op0 = TREE_OPERAND (expr, 1);
4395       STRIP_NOPS (op0);
4396       op1 = TREE_OPERAND (expr, 2);
4397       STRIP_NOPS (op1);
4398       break;
4399     case LT_EXPR:
4400     case LE_EXPR:
4401     case GT_EXPR:
4402     case GE_EXPR:
4403     case EQ_EXPR:
4404     case NE_EXPR:
4405     case UNORDERED_EXPR:
4406     case ORDERED_EXPR:
4407     case UNLT_EXPR:
4408     case UNLE_EXPR:
4409     case UNGT_EXPR:
4410     case UNGE_EXPR:
4411     case UNEQ_EXPR:
4412     case LTGT_EXPR:
4413     case MAX_EXPR:
4414     case MIN_EXPR:
4415       op0 = TREE_OPERAND (expr, 0);
4416       STRIP_NOPS (op0);
4417       op1 = TREE_OPERAND (expr, 1);
4418       STRIP_NOPS (op1);
4419       break;
4420
4421     default:
4422       /* Just an arbitrary value, FIXME.  */
4423       return comp_cost (target_spill_cost[speed], 0);
4424     }
4425
4426   if (op0 == NULL_TREE
4427       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4428     cost0 = no_cost;
4429   else
4430     cost0 = force_expr_to_var_cost (op0, speed);
4431
4432   if (op1 == NULL_TREE
4433       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4434     cost1 = no_cost;
4435   else
4436     cost1 = force_expr_to_var_cost (op1, speed);
4437
4438   mode = TYPE_MODE (TREE_TYPE (expr));
4439   switch (TREE_CODE (expr))
4440     {
4441     case POINTER_PLUS_EXPR:
4442     case PLUS_EXPR:
4443     case MINUS_EXPR:
4444     case NEGATE_EXPR:
4445       cost = comp_cost (add_cost (speed, mode), 0);
4446       if (TREE_CODE (expr) != NEGATE_EXPR)
4447         {
4448           tree mult = NULL_TREE;
4449           comp_cost sa_cost;
4450           if (TREE_CODE (op1) == MULT_EXPR)
4451             mult = op1;
4452           else if (TREE_CODE (op0) == MULT_EXPR)
4453             mult = op0;
4454
4455           if (mult != NULL_TREE
4456               && is_a <scalar_int_mode> (mode, &int_mode)
4457               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4458               && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4459                                     speed, &sa_cost))
4460             return sa_cost;
4461         }
4462       break;
4463
4464     CASE_CONVERT:
4465       {
4466         tree inner_mode, outer_mode;
4467         outer_mode = TREE_TYPE (expr);
4468         inner_mode = TREE_TYPE (op0);
4469         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4470                                        TYPE_MODE (inner_mode), speed), 0);
4471       }
4472       break;
4473
4474     case MULT_EXPR:
4475       if (cst_and_fits_in_hwi (op0))
4476         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4477                                              mode, speed), 0);
4478       else if (cst_and_fits_in_hwi (op1))
4479         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4480                                              mode, speed), 0);
4481       else
4482         return comp_cost (target_spill_cost [speed], 0);
4483       break;
4484
4485     case EXACT_DIV_EXPR:
4486     case TRUNC_DIV_EXPR:
4487       /* Division by power of two is usually cheap, so we allow it.  Forbid
4488          anything else.  */
4489       if (integer_pow2p (TREE_OPERAND (expr, 1)))
4490         cost = comp_cost (add_cost (speed, mode), 0);
4491       else
4492         cost = comp_cost (target_spill_cost[speed], 0);
4493       break;
4494
4495     case BIT_AND_EXPR:
4496     case BIT_IOR_EXPR:
4497     case BIT_NOT_EXPR:
4498     case LSHIFT_EXPR:
4499     case RSHIFT_EXPR:
4500       cost = comp_cost (add_cost (speed, mode), 0);
4501       break;
4502     case COND_EXPR:
4503       op0 = TREE_OPERAND (expr, 0);
4504       STRIP_NOPS (op0);
4505       if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4506           || CONSTANT_CLASS_P (op0))
4507         cost = no_cost;
4508       else
4509         cost = force_expr_to_var_cost (op0, speed);
4510       break;
4511     case LT_EXPR:
4512     case LE_EXPR:
4513     case GT_EXPR:
4514     case GE_EXPR:
4515     case EQ_EXPR:
4516     case NE_EXPR:
4517     case UNORDERED_EXPR:
4518     case ORDERED_EXPR:
4519     case UNLT_EXPR:
4520     case UNLE_EXPR:
4521     case UNGT_EXPR:
4522     case UNGE_EXPR:
4523     case UNEQ_EXPR:
4524     case LTGT_EXPR:
4525     case MAX_EXPR:
4526     case MIN_EXPR:
4527       /* Simply use add cost for now, FIXME if there is some more accurate cost
4528          evaluation way.  */
4529       cost = comp_cost (add_cost (speed, mode), 0);
4530       break;
4531
4532     default:
4533       gcc_unreachable ();
4534     }
4535
4536   cost += cost0;
4537   cost += cost1;
4538   return cost;
4539 }
4540
4541 /* Estimates cost of forcing EXPR into a variable.  INV_VARS is a set of the
4542    invariants the computation depends on.  */
4543
4544 static comp_cost
4545 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4546 {
4547   if (!expr)
4548     return no_cost;
4549
4550   find_inv_vars (data, &expr, inv_vars);
4551   return force_expr_to_var_cost (expr, data->speed);
4552 }
4553
4554 /* Returns cost of auto-modifying address expression in shape base + offset.
4555    AINC_STEP is step size of the address IV.  AINC_OFFSET is offset of the
4556    address expression.  The address expression has ADDR_MODE in addr space
4557    AS.  The memory access has MEM_MODE.  SPEED means we are optimizing for
4558    speed or size.  */
4559
4560 enum ainc_type
4561 {
4562   AINC_PRE_INC,         /* Pre increment.  */
4563   AINC_PRE_DEC,         /* Pre decrement.  */
4564   AINC_POST_INC,        /* Post increment.  */
4565   AINC_POST_DEC,        /* Post decrement.  */
4566   AINC_NONE             /* Also the number of auto increment types.  */
4567 };
4568
4569 struct ainc_cost_data
4570 {
4571   int64_t costs[AINC_NONE];
4572 };
4573
4574 static comp_cost
4575 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4576                        machine_mode addr_mode, machine_mode mem_mode,
4577                        addr_space_t as, bool speed)
4578 {
4579   if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4580       && !USE_STORE_PRE_DECREMENT (mem_mode)
4581       && !USE_LOAD_POST_DECREMENT (mem_mode)
4582       && !USE_STORE_POST_DECREMENT (mem_mode)
4583       && !USE_LOAD_PRE_INCREMENT (mem_mode)
4584       && !USE_STORE_PRE_INCREMENT (mem_mode)
4585       && !USE_LOAD_POST_INCREMENT (mem_mode)
4586       && !USE_STORE_POST_INCREMENT (mem_mode))
4587     return infinite_cost;
4588
4589   static vec<ainc_cost_data *> ainc_cost_data_list;
4590   unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4591   if (idx >= ainc_cost_data_list.length ())
4592     {
4593       unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4594
4595       gcc_assert (nsize > idx);
4596       ainc_cost_data_list.safe_grow_cleared (nsize, true);
4597     }
4598
4599   ainc_cost_data *data = ainc_cost_data_list[idx];
4600   if (data == NULL)
4601     {
4602       rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4603
4604       data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4605       data->costs[AINC_PRE_DEC] = INFTY;
4606       data->costs[AINC_POST_DEC] = INFTY;
4607       data->costs[AINC_PRE_INC] = INFTY;
4608       data->costs[AINC_POST_INC] = INFTY;
4609       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4610           || USE_STORE_PRE_DECREMENT (mem_mode))
4611         {
4612           rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4613
4614           if (memory_address_addr_space_p (mem_mode, addr, as))
4615             data->costs[AINC_PRE_DEC]
4616               = address_cost (addr, mem_mode, as, speed);
4617         }
4618       if (USE_LOAD_POST_DECREMENT (mem_mode)
4619           || USE_STORE_POST_DECREMENT (mem_mode))
4620         {
4621           rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4622
4623           if (memory_address_addr_space_p (mem_mode, addr, as))
4624             data->costs[AINC_POST_DEC]
4625               = address_cost (addr, mem_mode, as, speed);
4626         }
4627       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4628           || USE_STORE_PRE_INCREMENT (mem_mode))
4629         {
4630           rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4631
4632           if (memory_address_addr_space_p (mem_mode, addr, as))
4633             data->costs[AINC_PRE_INC]
4634               = address_cost (addr, mem_mode, as, speed);
4635         }
4636       if (USE_LOAD_POST_INCREMENT (mem_mode)
4637           || USE_STORE_POST_INCREMENT (mem_mode))
4638         {
4639           rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4640
4641           if (memory_address_addr_space_p (mem_mode, addr, as))
4642             data->costs[AINC_POST_INC]
4643               = address_cost (addr, mem_mode, as, speed);
4644         }
4645       ainc_cost_data_list[idx] = data;
4646     }
4647
4648   poly_int64 msize = GET_MODE_SIZE (mem_mode);
4649   if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4650     return comp_cost (data->costs[AINC_POST_INC], 0);
4651   if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4652     return comp_cost (data->costs[AINC_POST_DEC], 0);
4653   if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4654     return comp_cost (data->costs[AINC_PRE_INC], 0);
4655   if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4656     return comp_cost (data->costs[AINC_PRE_DEC], 0);
4657
4658   return infinite_cost;
4659 }
4660
4661 /* Return cost of computing USE's address expression by using CAND.
4662    AFF_INV and AFF_VAR represent invariant and variant parts of the
4663    address expression, respectively.  If AFF_INV is simple, store
4664    the loop invariant variables which are depended by it in INV_VARS;
4665    if AFF_INV is complicated, handle it as a new invariant expression
4666    and record it in INV_EXPR.  RATIO indicates multiple times between
4667    steps of USE and CAND.  If CAN_AUTOINC is nonNULL, store boolean
4668    value to it indicating if this is an auto-increment address.  */
4669
4670 static comp_cost
4671 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4672                   struct iv_cand *cand, aff_tree *aff_inv,
4673                   aff_tree *aff_var, HOST_WIDE_INT ratio,
4674                   bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4675                   bool *can_autoinc, bool speed)
4676 {
4677   rtx addr;
4678   bool simple_inv = true;
4679   tree comp_inv = NULL_TREE, type = aff_var->type;
4680   comp_cost var_cost = no_cost, cost = no_cost;
4681   struct mem_address parts = {NULL_TREE, integer_one_node,
4682                               NULL_TREE, NULL_TREE, NULL_TREE};
4683   machine_mode addr_mode = TYPE_MODE (type);
4684   machine_mode mem_mode = TYPE_MODE (use->mem_type);
4685   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4686   /* Only true if ratio != 1.  */
4687   bool ok_with_ratio_p = false;
4688   bool ok_without_ratio_p = false;
4689   code_helper code = ERROR_MARK;
4690
4691   if (use->type == USE_PTR_ADDRESS)
4692     {
4693       gcall *call = as_a<gcall *> (use->stmt);
4694       gcc_assert (gimple_call_internal_p (call));
4695       code = gimple_call_internal_fn (call);
4696     }
4697
4698   if (!aff_combination_const_p (aff_inv))
4699     {
4700       parts.index = integer_one_node;
4701       /* Addressing mode "base + index".  */
4702       ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4703       if (ratio != 1)
4704         {
4705           parts.step = wide_int_to_tree (type, ratio);
4706           /* Addressing mode "base + index << scale".  */
4707           ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4708           if (!ok_with_ratio_p)
4709             parts.step = NULL_TREE;
4710         }
4711       if (ok_with_ratio_p || ok_without_ratio_p)
4712         {
4713           if (maybe_ne (aff_inv->offset, 0))
4714             {
4715               parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4716               /* Addressing mode "base + index [<< scale] + offset".  */
4717               if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4718                 parts.offset = NULL_TREE;
4719               else
4720                 aff_inv->offset = 0;
4721             }
4722
4723           move_fixed_address_to_symbol (&parts, aff_inv);
4724           /* Base is fixed address and is moved to symbol part.  */
4725           if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4726             parts.base = NULL_TREE;
4727
4728           /* Addressing mode "symbol + base + index [<< scale] [+ offset]".  */
4729           if (parts.symbol != NULL_TREE
4730               && !valid_mem_ref_p (mem_mode, as, &parts, code))
4731             {
4732               aff_combination_add_elt (aff_inv, parts.symbol, 1);
4733               parts.symbol = NULL_TREE;
4734               /* Reset SIMPLE_INV since symbol address needs to be computed
4735                  outside of address expression in this case.  */
4736               simple_inv = false;
4737               /* Symbol part is moved back to base part, it can't be NULL.  */
4738               parts.base = integer_one_node;
4739             }
4740         }
4741       else
4742         parts.index = NULL_TREE;
4743     }
4744   else
4745     {
4746       poly_int64 ainc_step;
4747       if (can_autoinc
4748           && ratio == 1
4749           && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4750         {
4751           poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4752
4753           if (stmt_after_increment (data->current_loop, cand, use->stmt))
4754             ainc_offset += ainc_step;
4755           cost = get_address_cost_ainc (ainc_step, ainc_offset,
4756                                         addr_mode, mem_mode, as, speed);
4757           if (!cost.infinite_cost_p ())
4758             {
4759               *can_autoinc = true;
4760               return cost;
4761             }
4762           cost = no_cost;
4763         }
4764       if (!aff_combination_zero_p (aff_inv))
4765         {
4766           parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4767           /* Addressing mode "base + offset".  */
4768           if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4769             parts.offset = NULL_TREE;
4770           else
4771             aff_inv->offset = 0;
4772         }
4773     }
4774
4775   if (simple_inv)
4776     simple_inv = (aff_inv == NULL
4777                   || aff_combination_const_p (aff_inv)
4778                   || aff_combination_singleton_var_p (aff_inv));
4779   if (!aff_combination_zero_p (aff_inv))
4780     comp_inv = aff_combination_to_tree (aff_inv);
4781   if (comp_inv != NULL_TREE)
4782     cost = force_var_cost (data, comp_inv, inv_vars);
4783   if (ratio != 1 && parts.step == NULL_TREE)
4784     var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4785   if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4786     var_cost += add_cost (speed, addr_mode);
4787
4788   if (comp_inv && inv_expr && !simple_inv)
4789     {
4790       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4791       /* Clear depends on.  */
4792       if (*inv_expr != NULL && inv_vars && *inv_vars)
4793         bitmap_clear (*inv_vars);
4794
4795       /* Cost of small invariant expression adjusted against loop niters
4796          is usually zero, which makes it difficult to be differentiated
4797          from candidate based on loop invariant variables.  Secondly, the
4798          generated invariant expression may not be hoisted out of loop by
4799          following pass.  We penalize the cost by rounding up in order to
4800          neutralize such effects.  */
4801       cost.cost = adjust_setup_cost (data, cost.cost, true);
4802       cost.scratch = cost.cost;
4803     }
4804
4805   cost += var_cost;
4806   addr = addr_for_mem_ref (&parts, as, false);
4807   gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4808   cost += address_cost (addr, mem_mode, as, speed);
4809
4810   if (parts.symbol != NULL_TREE)
4811     cost.complexity += 1;
4812   /* Don't increase the complexity of adding a scaled index if it's
4813      the only kind of index that the target allows.  */
4814   if (parts.step != NULL_TREE && ok_without_ratio_p)
4815     cost.complexity += 1;
4816   if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4817     cost.complexity += 1;
4818   if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4819     cost.complexity += 1;
4820
4821   return cost;
4822 }
4823
4824 /* Scale (multiply) the computed COST (except scratch part that should be
4825    hoisted out a loop) by header->frequency / AT->frequency, which makes
4826    expected cost more accurate.  */
4827
4828 static comp_cost
4829 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4830 {
4831   if (data->speed
4832       && data->current_loop->header->count.to_frequency (cfun) > 0)
4833     {
4834       basic_block bb = gimple_bb (at);
4835       gcc_assert (cost.scratch <= cost.cost);
4836       int scale_factor = (int)(intptr_t) bb->aux;
4837       if (scale_factor == 1)
4838         return cost;
4839
4840       int64_t scaled_cost
4841         = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4842
4843       if (dump_file && (dump_flags & TDF_DETAILS))
4844         fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4845                  "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4846                  1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4847
4848       cost.cost = scaled_cost;
4849     }
4850
4851   return cost;
4852 }
4853
4854 /* Determines the cost of the computation by that USE is expressed
4855    from induction variable CAND.  If ADDRESS_P is true, we just need
4856    to create an address from it, otherwise we want to get it into
4857    register.  A set of invariants we depend on is stored in INV_VARS.
4858    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4859    addressing is likely.  If INV_EXPR is nonnull, record invariant
4860    expr entry in it.  */
4861
4862 static comp_cost
4863 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4864                       struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4865                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4866 {
4867   gimple *at = use->stmt;
4868   tree ubase = use->iv->base, cbase = cand->iv->base;
4869   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4870   tree comp_inv = NULL_TREE;
4871   HOST_WIDE_INT ratio, aratio;
4872   comp_cost cost;
4873   widest_int rat;
4874   aff_tree aff_inv, aff_var;
4875   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4876
4877   if (inv_vars)
4878     *inv_vars = NULL;
4879   if (can_autoinc)
4880     *can_autoinc = false;
4881   if (inv_expr)
4882     *inv_expr = NULL;
4883
4884   /* Check if we have enough precision to express the values of use.  */
4885   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4886     return infinite_cost;
4887
4888   if (address_p
4889       || (use->iv->base_object
4890           && cand->iv->base_object
4891           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4892           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4893     {
4894       /* Do not try to express address of an object with computation based
4895          on address of a different object.  This may cause problems in rtl
4896          level alias analysis (that does not expect this to be happening,
4897          as this is illegal in C), and would be unlikely to be useful
4898          anyway.  */
4899       if (use->iv->base_object
4900           && cand->iv->base_object
4901           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4902         return infinite_cost;
4903     }
4904
4905   if (!get_computation_aff_1 (data->current_loop, at, use,
4906                               cand, &aff_inv, &aff_var, &rat)
4907       || !wi::fits_shwi_p (rat))
4908     return infinite_cost;
4909
4910   ratio = rat.to_shwi ();
4911   if (address_p)
4912     {
4913       cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4914                                inv_vars, inv_expr, can_autoinc, speed);
4915       cost = get_scaled_computation_cost_at (data, at, cost);
4916       /* For doloop IV cand, add on the extra cost.  */
4917       cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4918       return cost;
4919     }
4920
4921   bool simple_inv = (aff_combination_const_p (&aff_inv)
4922                      || aff_combination_singleton_var_p (&aff_inv));
4923   tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4924   aff_combination_convert (&aff_inv, signed_type);
4925   if (!aff_combination_zero_p (&aff_inv))
4926     comp_inv = aff_combination_to_tree (&aff_inv);
4927
4928   cost = force_var_cost (data, comp_inv, inv_vars);
4929   if (comp_inv && inv_expr && !simple_inv)
4930     {
4931       *inv_expr = get_loop_invariant_expr (data, comp_inv);
4932       /* Clear depends on.  */
4933       if (*inv_expr != NULL && inv_vars && *inv_vars)
4934         bitmap_clear (*inv_vars);
4935
4936       cost.cost = adjust_setup_cost (data, cost.cost);
4937       /* Record setup cost in scratch field.  */
4938       cost.scratch = cost.cost;
4939     }
4940   /* Cost of constant integer can be covered when adding invariant part to
4941      variant part.  */
4942   else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4943     cost = no_cost;
4944
4945   /* Need type narrowing to represent use with cand.  */
4946   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4947     {
4948       machine_mode outer_mode = TYPE_MODE (utype);
4949       machine_mode inner_mode = TYPE_MODE (ctype);
4950       cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4951     }
4952
4953   /* Turn a + i * (-c) into a - i * c.  */
4954   if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4955     aratio = -ratio;
4956   else
4957     aratio = ratio;
4958
4959   if (ratio != 1)
4960     cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4961
4962   /* TODO: We may also need to check if we can compute  a + i * 4 in one
4963      instruction.  */
4964   /* Need to add up the invariant and variant parts.  */
4965   if (comp_inv && !integer_zerop (comp_inv))
4966     cost += add_cost (speed, TYPE_MODE (utype));
4967
4968   cost = get_scaled_computation_cost_at (data, at, cost);
4969
4970   /* For doloop IV cand, add on the extra cost.  */
4971   if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4972     cost += targetm.doloop_cost_for_generic;
4973
4974   return cost;
4975 }
4976
4977 /* Determines cost of computing the use in GROUP with CAND in a generic
4978    expression.  */
4979
4980 static bool
4981 determine_group_iv_cost_generic (struct ivopts_data *data,
4982                                  struct iv_group *group, struct iv_cand *cand)
4983 {
4984   comp_cost cost;
4985   iv_inv_expr_ent *inv_expr = NULL;
4986   bitmap inv_vars = NULL, inv_exprs = NULL;
4987   struct iv_use *use = group->vuses[0];
4988
4989   /* The simple case first -- if we need to express value of the preserved
4990      original biv, the cost is 0.  This also prevents us from counting the
4991      cost of increment twice -- once at this use and once in the cost of
4992      the candidate.  */
4993   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4994     cost = no_cost;
4995   /* If the IV candidate involves undefined SSA values and is not the
4996      same IV as on the USE avoid using that candidate here.  */
4997   else if (cand->involves_undefs
4998            && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4999     return false;
5000   else
5001     cost = get_computation_cost (data, use, cand, false,
5002                                  &inv_vars, NULL, &inv_expr);
5003
5004   if (inv_expr)
5005     {
5006       inv_exprs = BITMAP_ALLOC (NULL);
5007       bitmap_set_bit (inv_exprs, inv_expr->id);
5008     }
5009   set_group_iv_cost (data, group, cand, cost, inv_vars,
5010                      NULL_TREE, ERROR_MARK, inv_exprs);
5011   return !cost.infinite_cost_p ();
5012 }
5013
5014 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5015
5016 static bool
5017 determine_group_iv_cost_address (struct ivopts_data *data,
5018                                  struct iv_group *group, struct iv_cand *cand)
5019 {
5020   unsigned i;
5021   bitmap inv_vars = NULL, inv_exprs = NULL;
5022   bool can_autoinc;
5023   iv_inv_expr_ent *inv_expr = NULL;
5024   struct iv_use *use = group->vuses[0];
5025   comp_cost sum_cost = no_cost, cost;
5026
5027   cost = get_computation_cost (data, use, cand, true,
5028                                &inv_vars, &can_autoinc, &inv_expr);
5029
5030   if (inv_expr)
5031     {
5032       inv_exprs = BITMAP_ALLOC (NULL);
5033       bitmap_set_bit (inv_exprs, inv_expr->id);
5034     }
5035   sum_cost = cost;
5036   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5037     {
5038       if (can_autoinc)
5039         sum_cost -= cand->cost_step;
5040       /* If we generated the candidate solely for exploiting autoincrement
5041          opportunities, and it turns out it can't be used, set the cost to
5042          infinity to make sure we ignore it.  */
5043       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5044         sum_cost = infinite_cost;
5045     }
5046
5047   /* Uses in a group can share setup code, so only add setup cost once.  */
5048   cost -= cost.scratch;
5049   /* Compute and add costs for rest uses of this group.  */
5050   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5051     {
5052       struct iv_use *next = group->vuses[i];
5053
5054       /* TODO: We could skip computing cost for sub iv_use when it has the
5055          same cost as the first iv_use, but the cost really depends on the
5056          offset and where the iv_use is.  */
5057         cost = get_computation_cost (data, next, cand, true,
5058                                      NULL, &can_autoinc, &inv_expr);
5059         if (inv_expr)
5060           {
5061             if (!inv_exprs)
5062               inv_exprs = BITMAP_ALLOC (NULL);
5063
5064             bitmap_set_bit (inv_exprs, inv_expr->id);
5065           }
5066       sum_cost += cost;
5067     }
5068   set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5069                      NULL_TREE, ERROR_MARK, inv_exprs);
5070
5071   return !sum_cost.infinite_cost_p ();
5072 }
5073
5074 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5075    and stores it to VAL.  */
5076
5077 static void
5078 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5079                class tree_niter_desc *desc, aff_tree *val)
5080 {
5081   aff_tree step, delta, nit;
5082   struct iv *iv = cand->iv;
5083   tree type = TREE_TYPE (iv->base);
5084   tree niter = desc->niter;
5085   bool after_adjust = stmt_after_increment (loop, cand, at);
5086   tree steptype;
5087
5088   if (POINTER_TYPE_P (type))
5089     steptype = sizetype;
5090   else
5091     steptype = unsigned_type_for (type);
5092
5093   /* If AFTER_ADJUST is required, the code below generates the equivalent
5094      of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5095      BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5096      SSA_NAME - 1.  Unfortunately, guaranteeing that adding 1 to NITER
5097      doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5098      class for common idioms that we know are safe.  */
5099   if (after_adjust
5100       && desc->control.no_overflow
5101       && integer_onep (desc->control.step)
5102       && (desc->cmp == LT_EXPR
5103           || desc->cmp == NE_EXPR)
5104       && TREE_CODE (desc->bound) == SSA_NAME)
5105     {
5106       if (integer_onep (desc->control.base))
5107         {
5108           niter = desc->bound;
5109           after_adjust = false;
5110         }
5111       else if (TREE_CODE (niter) == MINUS_EXPR
5112                && integer_onep (TREE_OPERAND (niter, 1)))
5113         {
5114           niter = TREE_OPERAND (niter, 0);
5115           after_adjust = false;
5116         }
5117     }
5118
5119   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5120   aff_combination_convert (&step, steptype);
5121   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5122   aff_combination_convert (&nit, steptype);
5123   aff_combination_mult (&nit, &step, &delta);
5124   if (after_adjust)
5125     aff_combination_add (&delta, &step);
5126
5127   tree_to_aff_combination (iv->base, type, val);
5128   if (!POINTER_TYPE_P (type))
5129     aff_combination_convert (val, steptype);
5130   aff_combination_add (val, &delta);
5131 }
5132
5133 /* Returns period of induction variable iv.  */
5134
5135 static tree
5136 iv_period (struct iv *iv)
5137 {
5138   tree step = iv->step, period, type;
5139   tree pow2div;
5140
5141   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5142
5143   type = unsigned_type_for (TREE_TYPE (step));
5144   /* Period of the iv is lcm (step, type_range)/step -1,
5145      i.e., N*type_range/step - 1. Since type range is power
5146      of two, N == (step >> num_of_ending_zeros_binary (step),
5147      so the final result is
5148
5149        (type_range >> num_of_ending_zeros_binary (step)) - 1
5150
5151   */
5152   pow2div = num_ending_zeros (step);
5153
5154   period = build_low_bits_mask (type,
5155                                 (TYPE_PRECISION (type)
5156                                  - tree_to_uhwi (pow2div)));
5157
5158   return period;
5159 }
5160
5161 /* Returns the comparison operator used when eliminating the iv USE.  */
5162
5163 static enum tree_code
5164 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5165 {
5166   class loop *loop = data->current_loop;
5167   basic_block ex_bb;
5168   edge exit;
5169
5170   ex_bb = gimple_bb (use->stmt);
5171   exit = EDGE_SUCC (ex_bb, 0);
5172   if (flow_bb_inside_loop_p (loop, exit->dest))
5173     exit = EDGE_SUCC (ex_bb, 1);
5174
5175   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5176 }
5177
5178 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5179    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5180    calculation is performed in non-wrapping type.
5181
5182    TODO: More generally, we could test for the situation that
5183          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5184          This would require knowing the sign of OFFSET.  */
5185
5186 static bool
5187 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5188 {
5189   enum tree_code code;
5190   tree e1, e2;
5191   aff_tree aff_e1, aff_e2, aff_offset;
5192
5193   if (!nowrap_type_p (TREE_TYPE (base)))
5194     return false;
5195
5196   base = expand_simple_operations (base);
5197
5198   if (TREE_CODE (base) == SSA_NAME)
5199     {
5200       gimple *stmt = SSA_NAME_DEF_STMT (base);
5201
5202       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5203         return false;
5204
5205       code = gimple_assign_rhs_code (stmt);
5206       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5207         return false;
5208
5209       e1 = gimple_assign_rhs1 (stmt);
5210       e2 = gimple_assign_rhs2 (stmt);
5211     }
5212   else
5213     {
5214       code = TREE_CODE (base);
5215       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5216         return false;
5217       e1 = TREE_OPERAND (base, 0);
5218       e2 = TREE_OPERAND (base, 1);
5219     }
5220
5221   /* Use affine expansion as deeper inspection to prove the equality.  */
5222   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5223                                   &aff_e2, &data->name_expansion_cache);
5224   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5225                                   &aff_offset, &data->name_expansion_cache);
5226   aff_combination_scale (&aff_offset, -1);
5227   switch (code)
5228     {
5229     case PLUS_EXPR:
5230       aff_combination_add (&aff_e2, &aff_offset);
5231       if (aff_combination_zero_p (&aff_e2))
5232         return true;
5233
5234       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5235                                       &aff_e1, &data->name_expansion_cache);
5236       aff_combination_add (&aff_e1, &aff_offset);
5237       return aff_combination_zero_p (&aff_e1);
5238
5239     case POINTER_PLUS_EXPR:
5240       aff_combination_add (&aff_e2, &aff_offset);
5241       return aff_combination_zero_p (&aff_e2);
5242
5243     default:
5244       return false;
5245     }
5246 }
5247
5248 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5249    comparison with CAND.  NITER describes the number of iterations of
5250    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5251
5252    We aim to handle the following situation:
5253
5254    sometype *base, *p;
5255    int a, b, i;
5256
5257    i = a;
5258    p = p_0 = base + a;
5259
5260    do
5261      {
5262        bla (*p);
5263        p++;
5264        i++;
5265      }
5266    while (i < b);
5267
5268    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5269    We aim to optimize this to
5270
5271    p = p_0 = base + a;
5272    do
5273      {
5274        bla (*p);
5275        p++;
5276      }
5277    while (p < p_0 - a + b);
5278
5279    This preserves the correctness, since the pointer arithmetics does not
5280    overflow.  More precisely:
5281
5282    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5283       overflow in computing it or the values of p.
5284    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5285       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5286
5287 static bool
5288 iv_elimination_compare_lt (struct ivopts_data *data,
5289                            struct iv_cand *cand, enum tree_code *comp_p,
5290                            class tree_niter_desc *niter)
5291 {
5292   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5293   class aff_tree nit, tmpa, tmpb;
5294   enum tree_code comp;
5295   HOST_WIDE_INT step;
5296
5297   /* We need to know that the candidate induction variable does not overflow.
5298      While more complex analysis may be used to prove this, for now just
5299      check that the variable appears in the original program and that it
5300      is computed in a type that guarantees no overflows.  */
5301   cand_type = TREE_TYPE (cand->iv->base);
5302   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5303     return false;
5304
5305   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5306      the calculation of the BOUND could overflow, making the comparison
5307      invalid.  */
5308   if (!data->loop_single_exit_p)
5309     return false;
5310
5311   /* We need to be able to decide whether candidate is increasing or decreasing
5312      in order to choose the right comparison operator.  */
5313   if (!cst_and_fits_in_hwi (cand->iv->step))
5314     return false;
5315   step = int_cst_value (cand->iv->step);
5316
5317   /* Check that the number of iterations matches the expected pattern:
5318      a + 1 > b ? 0 : b - a - 1.  */
5319   mbz = niter->may_be_zero;
5320   if (TREE_CODE (mbz) == GT_EXPR)
5321     {
5322       /* Handle a + 1 > b.  */
5323       tree op0 = TREE_OPERAND (mbz, 0);
5324       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5325         {
5326           a = TREE_OPERAND (op0, 0);
5327           b = TREE_OPERAND (mbz, 1);
5328         }
5329       else
5330         return false;
5331     }
5332   else if (TREE_CODE (mbz) == LT_EXPR)
5333     {
5334       tree op1 = TREE_OPERAND (mbz, 1);
5335
5336       /* Handle b < a + 1.  */
5337       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5338         {
5339           a = TREE_OPERAND (op1, 0);
5340           b = TREE_OPERAND (mbz, 0);
5341         }
5342       else
5343         return false;
5344     }
5345   else
5346     return false;
5347
5348   /* Expected number of iterations is B - A - 1.  Check that it matches
5349      the actual number, i.e., that B - A - NITER = 1.  */
5350   tree_to_aff_combination (niter->niter, nit_type, &nit);
5351   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5352   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5353   aff_combination_scale (&nit, -1);
5354   aff_combination_scale (&tmpa, -1);
5355   aff_combination_add (&tmpb, &tmpa);
5356   aff_combination_add (&tmpb, &nit);
5357   if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5358     return false;
5359
5360   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5361      overflow.  */
5362   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5363                         cand->iv->step,
5364                         fold_convert (TREE_TYPE (cand->iv->step), a));
5365   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5366     return false;
5367
5368   /* Determine the new comparison operator.  */
5369   comp = step < 0 ? GT_EXPR : LT_EXPR;
5370   if (*comp_p == NE_EXPR)
5371     *comp_p = comp;
5372   else if (*comp_p == EQ_EXPR)
5373     *comp_p = invert_tree_comparison (comp, false);
5374   else
5375     gcc_unreachable ();
5376
5377   return true;
5378 }
5379
5380 /* Check whether it is possible to express the condition in USE by comparison
5381    of candidate CAND.  If so, store the value compared with to BOUND, and the
5382    comparison operator to COMP.  */
5383
5384 static bool
5385 may_eliminate_iv (struct ivopts_data *data,
5386                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5387                   enum tree_code *comp)
5388 {
5389   basic_block ex_bb;
5390   edge exit;
5391   tree period;
5392   class loop *loop = data->current_loop;
5393   aff_tree bnd;
5394   class tree_niter_desc *desc = NULL;
5395
5396   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5397     return false;
5398
5399   /* For now works only for exits that dominate the loop latch.
5400      TODO: extend to other conditions inside loop body.  */
5401   ex_bb = gimple_bb (use->stmt);
5402   if (use->stmt != last_nondebug_stmt (ex_bb)
5403       || gimple_code (use->stmt) != GIMPLE_COND
5404       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5405     return false;
5406
5407   exit = EDGE_SUCC (ex_bb, 0);
5408   if (flow_bb_inside_loop_p (loop, exit->dest))
5409     exit = EDGE_SUCC (ex_bb, 1);
5410   if (flow_bb_inside_loop_p (loop, exit->dest))
5411     return false;
5412
5413   desc = niter_for_exit (data, exit);
5414   if (!desc)
5415     return false;
5416
5417   /* Determine whether we can use the variable to test the exit condition.
5418      This is the case iff the period of the induction variable is greater
5419      than the number of iterations for which the exit condition is true.  */
5420   period = iv_period (cand->iv);
5421
5422   /* If the number of iterations is constant, compare against it directly.  */
5423   if (TREE_CODE (desc->niter) == INTEGER_CST)
5424     {
5425       /* See cand_value_at.  */
5426       if (stmt_after_increment (loop, cand, use->stmt))
5427         {
5428           if (!tree_int_cst_lt (desc->niter, period))
5429             return false;
5430         }
5431       else
5432         {
5433           if (tree_int_cst_lt (period, desc->niter))
5434             return false;
5435         }
5436     }
5437
5438   /* If not, and if this is the only possible exit of the loop, see whether
5439      we can get a conservative estimate on the number of iterations of the
5440      entire loop and compare against that instead.  */
5441   else
5442     {
5443       widest_int period_value, max_niter;
5444
5445       max_niter = desc->max;
5446       if (stmt_after_increment (loop, cand, use->stmt))
5447         max_niter += 1;
5448       period_value = wi::to_widest (period);
5449       if (wi::gtu_p (max_niter, period_value))
5450         {
5451           /* See if we can take advantage of inferred loop bound
5452              information.  */
5453           if (data->loop_single_exit_p)
5454             {
5455               if (!max_loop_iterations (loop, &max_niter))
5456                 return false;
5457               /* The loop bound is already adjusted by adding 1.  */
5458               if (wi::gtu_p (max_niter, period_value))
5459                 return false;
5460             }
5461           else
5462             return false;
5463         }
5464     }
5465
5466   /* For doloop IV cand, the bound would be zero.  It's safe whether
5467      may_be_zero set or not.  */
5468   if (cand->doloop_p)
5469     {
5470       *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5471       *comp = iv_elimination_compare (data, use);
5472       return true;
5473     }
5474
5475   cand_value_at (loop, cand, use->stmt, desc, &bnd);
5476
5477   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5478                          aff_combination_to_tree (&bnd));
5479   *comp = iv_elimination_compare (data, use);
5480
5481   /* It is unlikely that computing the number of iterations using division
5482      would be more profitable than keeping the original induction variable.  */
5483   bool cond_overflow_p;
5484   if (expression_expensive_p (*bound, &cond_overflow_p))
5485     return false;
5486
5487   /* Sometimes, it is possible to handle the situation that the number of
5488      iterations may be zero unless additional assumptions by using <
5489      instead of != in the exit condition.
5490
5491      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5492            base the exit condition on it.  However, that is often too
5493            expensive.  */
5494   if (!integer_zerop (desc->may_be_zero))
5495     return iv_elimination_compare_lt (data, cand, comp, desc);
5496
5497   return true;
5498 }
5499
5500  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5501     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5502
5503 static int
5504 parm_decl_cost (struct ivopts_data *data, tree bound)
5505 {
5506   tree sbound = bound;
5507   STRIP_NOPS (sbound);
5508
5509   if (TREE_CODE (sbound) == SSA_NAME
5510       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5511       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5512       && data->body_includes_call)
5513     return COSTS_N_INSNS (1);
5514
5515   return 0;
5516 }
5517
5518 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5519
5520 static bool
5521 determine_group_iv_cost_cond (struct ivopts_data *data,
5522                               struct iv_group *group, struct iv_cand *cand)
5523 {
5524   tree bound = NULL_TREE;
5525   struct iv *cmp_iv;
5526   bitmap inv_exprs = NULL;
5527   bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5528   comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5529   enum comp_iv_rewrite rewrite_type;
5530   iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5531   tree *control_var, *bound_cst;
5532   enum tree_code comp = ERROR_MARK;
5533   struct iv_use *use = group->vuses[0];
5534
5535   /* Extract condition operands.  */
5536   rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5537                                         &bound_cst, NULL, &cmp_iv);
5538   gcc_assert (rewrite_type != COMP_IV_NA);
5539
5540   /* Try iv elimination.  */
5541   if (rewrite_type == COMP_IV_ELIM
5542       && may_eliminate_iv (data, use, cand, &bound, &comp))
5543     {
5544       elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5545       if (elim_cost.cost == 0)
5546         elim_cost.cost = parm_decl_cost (data, bound);
5547       else if (TREE_CODE (bound) == INTEGER_CST)
5548         elim_cost.cost = 0;
5549       /* If we replace a loop condition 'i < n' with 'p < base + n',
5550          inv_vars_elim will have 'base' and 'n' set, which implies that both
5551          'base' and 'n' will be live during the loop.    More likely,
5552          'base + n' will be loop invariant, resulting in only one live value
5553          during the loop.  So in that case we clear inv_vars_elim and set
5554          inv_expr_elim instead.  */
5555       if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5556         {
5557           inv_expr_elim = get_loop_invariant_expr (data, bound);
5558           bitmap_clear (inv_vars_elim);
5559         }
5560       /* The bound is a loop invariant, so it will be only computed
5561          once.  */
5562       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5563     }
5564
5565   /* When the condition is a comparison of the candidate IV against
5566      zero, prefer this IV.
5567
5568      TODO: The constant that we're subtracting from the cost should
5569      be target-dependent.  This information should be added to the
5570      target costs for each backend.  */
5571   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5572       && integer_zerop (*bound_cst)
5573       && (operand_equal_p (*control_var, cand->var_after, 0)
5574           || operand_equal_p (*control_var, cand->var_before, 0)))
5575     elim_cost -= 1;
5576
5577   express_cost = get_computation_cost (data, use, cand, false,
5578                                        &inv_vars_express, NULL,
5579                                        &inv_expr_express);
5580   if (cmp_iv != NULL)
5581     find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5582
5583   /* Count the cost of the original bound as well.  */
5584   bound_cost = force_var_cost (data, *bound_cst, NULL);
5585   if (bound_cost.cost == 0)
5586     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5587   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5588     bound_cost.cost = 0;
5589   express_cost += bound_cost;
5590
5591   /* Choose the better approach, preferring the eliminated IV. */
5592   if (elim_cost <= express_cost)
5593     {
5594       cost = elim_cost;
5595       inv_vars = inv_vars_elim;
5596       inv_vars_elim = NULL;
5597       inv_expr = inv_expr_elim;
5598       /* For doloop candidate/use pair, adjust to zero cost.  */
5599       if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5600         cost = no_cost;
5601     }
5602   else
5603     {
5604       cost = express_cost;
5605       inv_vars = inv_vars_express;
5606       inv_vars_express = NULL;
5607       bound = NULL_TREE;
5608       comp = ERROR_MARK;
5609       inv_expr = inv_expr_express;
5610     }
5611
5612   if (inv_expr)
5613     {
5614       inv_exprs = BITMAP_ALLOC (NULL);
5615       bitmap_set_bit (inv_exprs, inv_expr->id);
5616     }
5617   set_group_iv_cost (data, group, cand, cost,
5618                      inv_vars, bound, comp, inv_exprs);
5619
5620   if (inv_vars_elim)
5621     BITMAP_FREE (inv_vars_elim);
5622   if (inv_vars_express)
5623     BITMAP_FREE (inv_vars_express);
5624
5625   return !cost.infinite_cost_p ();
5626 }
5627
5628 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5629    if USE cannot be represented with CAND.  */
5630
5631 static bool
5632 determine_group_iv_cost (struct ivopts_data *data,
5633                          struct iv_group *group, struct iv_cand *cand)
5634 {
5635   switch (group->type)
5636     {
5637     case USE_NONLINEAR_EXPR:
5638       return determine_group_iv_cost_generic (data, group, cand);
5639
5640     case USE_REF_ADDRESS:
5641     case USE_PTR_ADDRESS:
5642       return determine_group_iv_cost_address (data, group, cand);
5643
5644     case USE_COMPARE:
5645       return determine_group_iv_cost_cond (data, group, cand);
5646
5647     default:
5648       gcc_unreachable ();
5649     }
5650 }
5651
5652 /* Return true if get_computation_cost indicates that autoincrement is
5653    a possibility for the pair of USE and CAND, false otherwise.  */
5654
5655 static bool
5656 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5657                            struct iv_cand *cand)
5658 {
5659   if (!address_p (use->type))
5660     return false;
5661
5662   bool can_autoinc = false;
5663   get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5664   return can_autoinc;
5665 }
5666
5667 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5668    use that allows autoincrement, and set their AINC_USE if possible.  */
5669
5670 static void
5671 set_autoinc_for_original_candidates (struct ivopts_data *data)
5672 {
5673   unsigned i, j;
5674
5675   for (i = 0; i < data->vcands.length (); i++)
5676     {
5677       struct iv_cand *cand = data->vcands[i];
5678       struct iv_use *closest_before = NULL;
5679       struct iv_use *closest_after = NULL;
5680       if (cand->pos != IP_ORIGINAL)
5681         continue;
5682
5683       for (j = 0; j < data->vgroups.length (); j++)
5684         {
5685           struct iv_group *group = data->vgroups[j];
5686           struct iv_use *use = group->vuses[0];
5687           unsigned uid = gimple_uid (use->stmt);
5688
5689           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5690             continue;
5691
5692           if (uid < gimple_uid (cand->incremented_at)
5693               && (closest_before == NULL
5694                   || uid > gimple_uid (closest_before->stmt)))
5695             closest_before = use;
5696
5697           if (uid > gimple_uid (cand->incremented_at)
5698               && (closest_after == NULL
5699                   || uid < gimple_uid (closest_after->stmt)))
5700             closest_after = use;
5701         }
5702
5703       if (closest_before != NULL
5704           && autoinc_possible_for_pair (data, closest_before, cand))
5705         cand->ainc_use = closest_before;
5706       else if (closest_after != NULL
5707                && autoinc_possible_for_pair (data, closest_after, cand))
5708         cand->ainc_use = closest_after;
5709     }
5710 }
5711
5712 /* Relate compare use with all candidates.  */
5713
5714 static void
5715 relate_compare_use_with_all_cands (struct ivopts_data *data)
5716 {
5717   unsigned i, count = data->vcands.length ();
5718   for (i = 0; i < data->vgroups.length (); i++)
5719     {
5720       struct iv_group *group = data->vgroups[i];
5721
5722       if (group->type == USE_COMPARE)
5723         bitmap_set_range (group->related_cands, 0, count);
5724     }
5725 }
5726
5727 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5728    PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1.  */
5729
5730 static tree
5731 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5732                              const widest_int &iterations_max)
5733 {
5734   tree ntype = TREE_TYPE (niter);
5735   tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5736   if (!pref_type)
5737     return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5738                         build_int_cst (ntype, 1));
5739
5740   gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5741
5742   int prec = TYPE_PRECISION (ntype);
5743   int pref_prec = TYPE_PRECISION (pref_type);
5744
5745   tree base;
5746
5747   /* Check if the PREFERRED_MODED is able to present niter.  */
5748   if (pref_prec > prec
5749       || wi::ltu_p (iterations_max,
5750                     widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5751                                       UNSIGNED)))
5752     {
5753       /* No wrap, it is safe to use preferred type after niter + 1.  */
5754       if (wi::ltu_p (iterations_max,
5755                      widest_int::from (wi::max_value (prec, UNSIGNED),
5756                                        UNSIGNED)))
5757         {
5758           /* This could help to optimize "-1 +1" pair when niter looks
5759              like "n-1": n is in original mode.  "base = (n - 1) + 1"
5760              in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n.  */
5761           base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5762                               build_int_cst (ntype, 1));
5763           base = fold_convert (pref_type, base);
5764         }
5765
5766       /* To avoid wrap, convert niter to preferred type before plus 1.  */
5767       else
5768         {
5769           niter = fold_convert (pref_type, niter);
5770           base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5771                               build_int_cst (pref_type, 1));
5772         }
5773     }
5774   else
5775     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5776                         build_int_cst (ntype, 1));
5777   return base;
5778 }
5779
5780 /* Add one doloop dedicated IV candidate:
5781      - Base is (may_be_zero ? 1 : (niter + 1)).
5782      - Step is -1.  */
5783
5784 static void
5785 add_iv_candidate_for_doloop (struct ivopts_data *data)
5786 {
5787   tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5788   gcc_assert (niter_desc && niter_desc->assumptions);
5789
5790   tree niter = niter_desc->niter;
5791   tree ntype = TREE_TYPE (niter);
5792   gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5793
5794   tree may_be_zero = niter_desc->may_be_zero;
5795   if (may_be_zero && integer_zerop (may_be_zero))
5796     may_be_zero = NULL_TREE;
5797   if (may_be_zero)
5798     {
5799       if (COMPARISON_CLASS_P (may_be_zero))
5800         {
5801           niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5802                                build_int_cst (ntype, 0),
5803                                rewrite_to_non_trapping_overflow (niter));
5804         }
5805       /* Don't try to obtain the iteration count expression when may_be_zero is
5806          integer_nonzerop (actually iteration count is one) or else.  */
5807       else
5808         return;
5809     }
5810
5811   machine_mode mode = TYPE_MODE (ntype);
5812   machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5813
5814   tree base;
5815   if (mode != pref_mode)
5816     {
5817       base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5818       ntype = TREE_TYPE (base);
5819     }
5820   else
5821     base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5822                         build_int_cst (ntype, 1));
5823
5824
5825   add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5826 }
5827
5828 /* Finds the candidates for the induction variables.  */
5829
5830 static void
5831 find_iv_candidates (struct ivopts_data *data)
5832 {
5833   /* Add commonly used ivs.  */
5834   add_standard_iv_candidates (data);
5835
5836   /* Add doloop dedicated ivs.  */
5837   if (data->doloop_use_p)
5838     add_iv_candidate_for_doloop (data);
5839
5840   /* Add old induction variables.  */
5841   add_iv_candidate_for_bivs (data);
5842
5843   /* Add induction variables derived from uses.  */
5844   add_iv_candidate_for_groups (data);
5845
5846   set_autoinc_for_original_candidates (data);
5847
5848   /* Record the important candidates.  */
5849   record_important_candidates (data);
5850
5851   /* Relate compare iv_use with all candidates.  */
5852   if (!data->consider_all_candidates)
5853     relate_compare_use_with_all_cands (data);
5854
5855   if (dump_file && (dump_flags & TDF_DETAILS))
5856     {
5857       unsigned i;
5858
5859       fprintf (dump_file, "\n<Important Candidates>:\t");
5860       for (i = 0; i < data->vcands.length (); i++)
5861         if (data->vcands[i]->important)
5862           fprintf (dump_file, " %d,", data->vcands[i]->id);
5863       fprintf (dump_file, "\n");
5864
5865       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5866       for (i = 0; i < data->vgroups.length (); i++)
5867         {
5868           struct iv_group *group = data->vgroups[i];
5869
5870           if (group->related_cands)
5871             {
5872               fprintf (dump_file, "  Group %d:\t", group->id);
5873               dump_bitmap (dump_file, group->related_cands);
5874             }
5875         }
5876       fprintf (dump_file, "\n");
5877     }
5878 }
5879
5880 /* Determines costs of computing use of iv with an iv candidate.  */
5881
5882 static void
5883 determine_group_iv_costs (struct ivopts_data *data)
5884 {
5885   unsigned i, j;
5886   struct iv_cand *cand;
5887   struct iv_group *group;
5888   bitmap to_clear = BITMAP_ALLOC (NULL);
5889
5890   alloc_use_cost_map (data);
5891
5892   for (i = 0; i < data->vgroups.length (); i++)
5893     {
5894       group = data->vgroups[i];
5895
5896       if (data->consider_all_candidates)
5897         {
5898           for (j = 0; j < data->vcands.length (); j++)
5899             {
5900               cand = data->vcands[j];
5901               determine_group_iv_cost (data, group, cand);
5902             }
5903         }
5904       else
5905         {
5906           bitmap_iterator bi;
5907
5908           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5909             {
5910               cand = data->vcands[j];
5911               if (!determine_group_iv_cost (data, group, cand))
5912                 bitmap_set_bit (to_clear, j);
5913             }
5914
5915           /* Remove the candidates for that the cost is infinite from
5916              the list of related candidates.  */
5917           bitmap_and_compl_into (group->related_cands, to_clear);
5918           bitmap_clear (to_clear);
5919         }
5920     }
5921
5922   BITMAP_FREE (to_clear);
5923
5924   if (dump_file && (dump_flags & TDF_DETAILS))
5925     {
5926       bitmap_iterator bi;
5927
5928       /* Dump invariant variables.  */
5929       fprintf (dump_file, "\n<Invariant Vars>:\n");
5930       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5931         {
5932           struct version_info *info = ver_info (data, i);
5933           if (info->inv_id)
5934             {
5935               fprintf (dump_file, "Inv %d:\t", info->inv_id);
5936               print_generic_expr (dump_file, info->name, TDF_SLIM);
5937               fprintf (dump_file, "%s\n",
5938                        info->has_nonlin_use ? "" : "\t(eliminable)");
5939             }
5940         }
5941
5942       /* Dump invariant expressions.  */
5943       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5944       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5945
5946       for (hash_table<iv_inv_expr_hasher>::iterator it
5947            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5948            ++it)
5949         list.safe_push (*it);
5950
5951       list.qsort (sort_iv_inv_expr_ent);
5952
5953       for (i = 0; i < list.length (); ++i)
5954         {
5955           fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5956           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5957           fprintf (dump_file, "\n");
5958         }
5959
5960       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5961
5962       for (i = 0; i < data->vgroups.length (); i++)
5963         {
5964           group = data->vgroups[i];
5965
5966           fprintf (dump_file, "Group %d:\n", i);
5967           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5968           for (j = 0; j < group->n_map_members; j++)
5969             {
5970               if (!group->cost_map[j].cand
5971                   || group->cost_map[j].cost.infinite_cost_p ())
5972                 continue;
5973
5974               fprintf (dump_file, "  %d\t%" PRId64 "\t%d\t",
5975                        group->cost_map[j].cand->id,
5976                        group->cost_map[j].cost.cost,
5977                        group->cost_map[j].cost.complexity);
5978               if (!group->cost_map[j].inv_exprs
5979                   || bitmap_empty_p (group->cost_map[j].inv_exprs))
5980                 fprintf (dump_file, "NIL;\t");
5981               else
5982                 bitmap_print (dump_file,
5983                               group->cost_map[j].inv_exprs, "", ";\t");
5984               if (!group->cost_map[j].inv_vars
5985                   || bitmap_empty_p (group->cost_map[j].inv_vars))
5986                 fprintf (dump_file, "NIL;\n");
5987               else
5988                 bitmap_print (dump_file,
5989                               group->cost_map[j].inv_vars, "", "\n");
5990             }
5991
5992           fprintf (dump_file, "\n");
5993         }
5994       fprintf (dump_file, "\n");
5995     }
5996 }
5997
5998 /* Determines cost of the candidate CAND.  */
5999
6000 static void
6001 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6002 {
6003   comp_cost cost_base;
6004   int64_t cost, cost_step;
6005   tree base;
6006
6007   gcc_assert (cand->iv != NULL);
6008
6009   /* There are two costs associated with the candidate -- its increment
6010      and its initialization.  The second is almost negligible for any loop
6011      that rolls enough, so we take it just very little into account.  */
6012
6013   base = cand->iv->base;
6014   cost_base = force_var_cost (data, base, NULL);
6015   /* It will be exceptional that the iv register happens to be initialized with
6016      the proper value at no cost.  In general, there will at least be a regcopy
6017      or a const set.  */
6018   if (cost_base.cost == 0)
6019     cost_base.cost = COSTS_N_INSNS (1);
6020   /* Doloop decrement should be considered as zero cost.  */
6021   if (cand->doloop_p)
6022     cost_step = 0;
6023   else
6024     cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6025   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6026
6027   /* Prefer the original ivs unless we may gain something by replacing it.
6028      The reason is to make debugging simpler; so this is not relevant for
6029      artificial ivs created by other optimization passes.  */
6030   if ((cand->pos != IP_ORIGINAL
6031        || !SSA_NAME_VAR (cand->var_before)
6032        || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6033       /* Prefer doloop as well.  */
6034       && !cand->doloop_p)
6035     cost++;
6036
6037   /* Prefer not to insert statements into latch unless there are some
6038      already (so that we do not create unnecessary jumps).  */
6039   if (cand->pos == IP_END
6040       && empty_block_p (ip_end_pos (data->current_loop)))
6041     cost++;
6042
6043   cand->cost = cost;
6044   cand->cost_step = cost_step;
6045 }
6046
6047 /* Determines costs of computation of the candidates.  */
6048
6049 static void
6050 determine_iv_costs (struct ivopts_data *data)
6051 {
6052   unsigned i;
6053
6054   if (dump_file && (dump_flags & TDF_DETAILS))
6055     {
6056       fprintf (dump_file, "<Candidate Costs>:\n");
6057       fprintf (dump_file, "  cand\tcost\n");
6058     }
6059
6060   for (i = 0; i < data->vcands.length (); i++)
6061     {
6062       struct iv_cand *cand = data->vcands[i];
6063
6064       determine_iv_cost (data, cand);
6065
6066       if (dump_file && (dump_flags & TDF_DETAILS))
6067         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
6068     }
6069
6070   if (dump_file && (dump_flags & TDF_DETAILS))
6071     fprintf (dump_file, "\n");
6072 }
6073
6074 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6075    induction variables.  Note N_INVS includes both invariant variables and
6076    invariant expressions.  */
6077
6078 static unsigned
6079 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6080                               unsigned n_cands)
6081 {
6082   unsigned cost;
6083   unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6084   unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6085   bool speed = data->speed;
6086
6087   /* If there is a call in the loop body, the call-clobbered registers
6088      are not available for loop invariants.  */
6089   if (data->body_includes_call)
6090     available_regs = available_regs - target_clobbered_regs;
6091
6092   /* If we have enough registers.  */
6093   if (regs_needed + target_res_regs < available_regs)
6094     cost = n_new;
6095   /* If close to running out of registers, try to preserve them.  */
6096   else if (regs_needed <= available_regs)
6097     cost = target_reg_cost [speed] * regs_needed;
6098   /* If we run out of available registers but the number of candidates
6099      does not, we penalize extra registers using target_spill_cost.  */
6100   else if (n_cands <= available_regs)
6101     cost = target_reg_cost [speed] * available_regs
6102            + target_spill_cost [speed] * (regs_needed - available_regs);
6103   /* If the number of candidates runs out available registers, we penalize
6104      extra candidate registers using target_spill_cost * 2.  Because it is
6105      more expensive to spill induction variable than invariant.  */
6106   else
6107     cost = target_reg_cost [speed] * available_regs
6108            + target_spill_cost [speed] * (n_cands - available_regs) * 2
6109            + target_spill_cost [speed] * (regs_needed - n_cands);
6110
6111   /* Finally, add the number of candidates, so that we prefer eliminating
6112      induction variables if possible.  */
6113   return cost + n_cands;
6114 }
6115
6116 /* For each size of the induction variable set determine the penalty.  */
6117
6118 static void
6119 determine_set_costs (struct ivopts_data *data)
6120 {
6121   unsigned j, n;
6122   gphi *phi;
6123   gphi_iterator psi;
6124   tree op;
6125   class loop *loop = data->current_loop;
6126   bitmap_iterator bi;
6127
6128   if (dump_file && (dump_flags & TDF_DETAILS))
6129     {
6130       fprintf (dump_file, "<Global Costs>:\n");
6131       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6132       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6133       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6134       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6135     }
6136
6137   n = 0;
6138   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6139     {
6140       phi = psi.phi ();
6141       op = PHI_RESULT (phi);
6142
6143       if (virtual_operand_p (op))
6144         continue;
6145
6146       if (get_iv (data, op))
6147         continue;
6148
6149       if (!POINTER_TYPE_P (TREE_TYPE (op))
6150           && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6151         continue;
6152
6153       n++;
6154     }
6155
6156   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6157     {
6158       struct version_info *info = ver_info (data, j);
6159
6160       if (info->inv_id && info->has_nonlin_use)
6161         n++;
6162     }
6163
6164   data->regs_used = n;
6165   if (dump_file && (dump_flags & TDF_DETAILS))
6166     fprintf (dump_file, "  regs_used %d\n", n);
6167
6168   if (dump_file && (dump_flags & TDF_DETAILS))
6169     {
6170       fprintf (dump_file, "  cost for size:\n");
6171       fprintf (dump_file, "  ivs\tcost\n");
6172       for (j = 0; j <= 2 * target_avail_regs; j++)
6173         fprintf (dump_file, "  %d\t%d\n", j,
6174                  ivopts_estimate_reg_pressure (data, 0, j));
6175       fprintf (dump_file, "\n");
6176     }
6177 }
6178
6179 /* Returns true if A is a cheaper cost pair than B.  */
6180
6181 static bool
6182 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6183 {
6184   if (!a)
6185     return false;
6186
6187   if (!b)
6188     return true;
6189
6190   if (a->cost < b->cost)
6191     return true;
6192
6193   if (b->cost < a->cost)
6194     return false;
6195
6196   /* In case the costs are the same, prefer the cheaper candidate.  */
6197   if (a->cand->cost < b->cand->cost)
6198     return true;
6199
6200   return false;
6201 }
6202
6203 /* Compare if A is a more expensive cost pair than B.  Return 1, 0 and -1
6204    for more expensive, equal and cheaper respectively.  */
6205
6206 static int
6207 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6208 {
6209   if (cheaper_cost_pair (a, b))
6210     return -1;
6211   if (cheaper_cost_pair (b, a))
6212     return 1;
6213
6214   return 0;
6215 }
6216
6217 /* Returns candidate by that USE is expressed in IVS.  */
6218
6219 static class cost_pair *
6220 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6221 {
6222   return ivs->cand_for_group[group->id];
6223 }
6224
6225 /* Computes the cost field of IVS structure.  */
6226
6227 static void
6228 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6229 {
6230   comp_cost cost = ivs->cand_use_cost;
6231
6232   cost += ivs->cand_cost;
6233   cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6234   ivs->cost = cost;
6235 }
6236
6237 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6238    and IVS.  */
6239
6240 static void
6241 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6242 {
6243   bitmap_iterator bi;
6244   unsigned iid;
6245
6246   if (!invs)
6247     return;
6248
6249   gcc_assert (n_inv_uses != NULL);
6250   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6251     {
6252       n_inv_uses[iid]--;
6253       if (n_inv_uses[iid] == 0)
6254         ivs->n_invs--;
6255     }
6256 }
6257
6258 /* Set USE not to be expressed by any candidate in IVS.  */
6259
6260 static void
6261 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6262                  struct iv_group *group)
6263 {
6264   unsigned gid = group->id, cid;
6265   class cost_pair *cp;
6266
6267   cp = ivs->cand_for_group[gid];
6268   if (!cp)
6269     return;
6270   cid = cp->cand->id;
6271
6272   ivs->bad_groups++;
6273   ivs->cand_for_group[gid] = NULL;
6274   ivs->n_cand_uses[cid]--;
6275
6276   if (ivs->n_cand_uses[cid] == 0)
6277     {
6278       bitmap_clear_bit (ivs->cands, cid);
6279       if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6280         ivs->n_cands--;
6281       ivs->cand_cost -= cp->cand->cost;
6282       iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6283       iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6284     }
6285
6286   ivs->cand_use_cost -= cp->cost;
6287   iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6288   iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6289   iv_ca_recount_cost (data, ivs);
6290 }
6291
6292 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6293    IVS.  */
6294
6295 static void
6296 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6297 {
6298   bitmap_iterator bi;
6299   unsigned iid;
6300
6301   if (!invs)
6302     return;
6303
6304   gcc_assert (n_inv_uses != NULL);
6305   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6306     {
6307       n_inv_uses[iid]++;
6308       if (n_inv_uses[iid] == 1)
6309         ivs->n_invs++;
6310     }
6311 }
6312
6313 /* Set cost pair for GROUP in set IVS to CP.  */
6314
6315 static void
6316 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6317               struct iv_group *group, class cost_pair *cp)
6318 {
6319   unsigned gid = group->id, cid;
6320
6321   if (ivs->cand_for_group[gid] == cp)
6322     return;
6323
6324   if (ivs->cand_for_group[gid])
6325     iv_ca_set_no_cp (data, ivs, group);
6326
6327   if (cp)
6328     {
6329       cid = cp->cand->id;
6330
6331       ivs->bad_groups--;
6332       ivs->cand_for_group[gid] = cp;
6333       ivs->n_cand_uses[cid]++;
6334       if (ivs->n_cand_uses[cid] == 1)
6335         {
6336           bitmap_set_bit (ivs->cands, cid);
6337           if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6338             ivs->n_cands++;
6339           ivs->cand_cost += cp->cand->cost;
6340           iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6341           iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6342         }
6343
6344       ivs->cand_use_cost += cp->cost;
6345       iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6346       iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6347       iv_ca_recount_cost (data, ivs);
6348     }
6349 }
6350
6351 /* Extend set IVS by expressing USE by some of the candidates in it
6352    if possible.  Consider all important candidates if candidates in
6353    set IVS don't give any result.  */
6354
6355 static void
6356 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6357                struct iv_group *group)
6358 {
6359   class cost_pair *best_cp = NULL, *cp;
6360   bitmap_iterator bi;
6361   unsigned i;
6362   struct iv_cand *cand;
6363
6364   gcc_assert (ivs->upto >= group->id);
6365   ivs->upto++;
6366   ivs->bad_groups++;
6367
6368   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6369     {
6370       cand = data->vcands[i];
6371       cp = get_group_iv_cost (data, group, cand);
6372       if (cheaper_cost_pair (cp, best_cp))
6373         best_cp = cp;
6374     }
6375
6376   if (best_cp == NULL)
6377     {
6378       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6379         {
6380           cand = data->vcands[i];
6381           cp = get_group_iv_cost (data, group, cand);
6382           if (cheaper_cost_pair (cp, best_cp))
6383             best_cp = cp;
6384         }
6385     }
6386
6387   iv_ca_set_cp (data, ivs, group, best_cp);
6388 }
6389
6390 /* Get cost for assignment IVS.  */
6391
6392 static comp_cost
6393 iv_ca_cost (class iv_ca *ivs)
6394 {
6395   /* This was a conditional expression but it triggered a bug in
6396      Sun C 5.5.  */
6397   if (ivs->bad_groups)
6398     return infinite_cost;
6399   else
6400     return ivs->cost;
6401 }
6402
6403 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6404    than OLD_CP.  Return 1, 0 and -1 for more, equal and fewer invariants
6405    respectively.  */
6406
6407 static int
6408 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6409                     struct iv_group *group, class cost_pair *old_cp,
6410                     class cost_pair *new_cp)
6411 {
6412   gcc_assert (old_cp && new_cp && old_cp != new_cp);
6413   unsigned old_n_invs = ivs->n_invs;
6414   iv_ca_set_cp (data, ivs, group, new_cp);
6415   unsigned new_n_invs = ivs->n_invs;
6416   iv_ca_set_cp (data, ivs, group, old_cp);
6417
6418   return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6419 }
6420
6421 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6422    it before NEXT.  */
6423
6424 static struct iv_ca_delta *
6425 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6426                  class cost_pair *new_cp, struct iv_ca_delta *next)
6427 {
6428   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6429
6430   change->group = group;
6431   change->old_cp = old_cp;
6432   change->new_cp = new_cp;
6433   change->next = next;
6434
6435   return change;
6436 }
6437
6438 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6439    are rewritten.  */
6440
6441 static struct iv_ca_delta *
6442 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6443 {
6444   struct iv_ca_delta *last;
6445
6446   if (!l2)
6447     return l1;
6448
6449   if (!l1)
6450     return l2;
6451
6452   for (last = l1; last->next; last = last->next)
6453     continue;
6454   last->next = l2;
6455
6456   return l1;
6457 }
6458
6459 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6460
6461 static struct iv_ca_delta *
6462 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6463 {
6464   struct iv_ca_delta *act, *next, *prev = NULL;
6465
6466   for (act = delta; act; act = next)
6467     {
6468       next = act->next;
6469       act->next = prev;
6470       prev = act;
6471
6472       std::swap (act->old_cp, act->new_cp);
6473     }
6474
6475   return prev;
6476 }
6477
6478 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6479    reverted instead.  */
6480
6481 static void
6482 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6483                     struct iv_ca_delta *delta, bool forward)
6484 {
6485   class cost_pair *from, *to;
6486   struct iv_ca_delta *act;
6487
6488   if (!forward)
6489     delta = iv_ca_delta_reverse (delta);
6490
6491   for (act = delta; act; act = act->next)
6492     {
6493       from = act->old_cp;
6494       to = act->new_cp;
6495       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6496       iv_ca_set_cp (data, ivs, act->group, to);
6497     }
6498
6499   if (!forward)
6500     iv_ca_delta_reverse (delta);
6501 }
6502
6503 /* Returns true if CAND is used in IVS.  */
6504
6505 static bool
6506 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6507 {
6508   return ivs->n_cand_uses[cand->id] > 0;
6509 }
6510
6511 /* Returns number of induction variable candidates in the set IVS.  */
6512
6513 static unsigned
6514 iv_ca_n_cands (class iv_ca *ivs)
6515 {
6516   return ivs->n_cands;
6517 }
6518
6519 /* Free the list of changes DELTA.  */
6520
6521 static void
6522 iv_ca_delta_free (struct iv_ca_delta **delta)
6523 {
6524   struct iv_ca_delta *act, *next;
6525
6526   for (act = *delta; act; act = next)
6527     {
6528       next = act->next;
6529       free (act);
6530     }
6531
6532   *delta = NULL;
6533 }
6534
6535 /* Allocates new iv candidates assignment.  */
6536
6537 static class iv_ca *
6538 iv_ca_new (struct ivopts_data *data)
6539 {
6540   class iv_ca *nw = XNEW (class iv_ca);
6541
6542   nw->upto = 0;
6543   nw->bad_groups = 0;
6544   nw->cand_for_group = XCNEWVEC (class cost_pair *,
6545                                  data->vgroups.length ());
6546   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6547   nw->cands = BITMAP_ALLOC (NULL);
6548   nw->n_cands = 0;
6549   nw->n_invs = 0;
6550   nw->cand_use_cost = no_cost;
6551   nw->cand_cost = 0;
6552   nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6553   nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6554   nw->cost = no_cost;
6555
6556   return nw;
6557 }
6558
6559 /* Free memory occupied by the set IVS.  */
6560
6561 static void
6562 iv_ca_free (class iv_ca **ivs)
6563 {
6564   free ((*ivs)->cand_for_group);
6565   free ((*ivs)->n_cand_uses);
6566   BITMAP_FREE ((*ivs)->cands);
6567   free ((*ivs)->n_inv_var_uses);
6568   free ((*ivs)->n_inv_expr_uses);
6569   free (*ivs);
6570   *ivs = NULL;
6571 }
6572
6573 /* Dumps IVS to FILE.  */
6574
6575 static void
6576 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6577 {
6578   unsigned i;
6579   comp_cost cost = iv_ca_cost (ivs);
6580
6581   fprintf (file, "  cost: %" PRId64 " (complexity %d)\n", cost.cost,
6582            cost.complexity);
6583   fprintf (file, "  reg_cost: %d\n",
6584            ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6585   fprintf (file, "  cand_cost: %" PRId64 "\n  cand_group_cost: "
6586            "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6587            ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6588   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6589
6590   for (i = 0; i < ivs->upto; i++)
6591     {
6592       struct iv_group *group = data->vgroups[i];
6593       class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6594       if (cp)
6595         fprintf (file, "   group:%d --> iv_cand:%d, cost=("
6596                  "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6597                  cp->cost.cost, cp->cost.complexity);
6598       else
6599         fprintf (file, "   group:%d --> ??\n", group->id);
6600     }
6601
6602   const char *pref = "";
6603   fprintf (file, "  invariant variables: ");
6604   for (i = 1; i <= data->max_inv_var_id; i++)
6605     if (ivs->n_inv_var_uses[i])
6606       {
6607         fprintf (file, "%s%d", pref, i);
6608         pref = ", ";
6609       }
6610
6611   pref = "";
6612   fprintf (file, "\n  invariant expressions: ");
6613   for (i = 1; i <= data->max_inv_expr_id; i++)
6614     if (ivs->n_inv_expr_uses[i])
6615       {
6616         fprintf (file, "%s%d", pref, i);
6617         pref = ", ";
6618       }
6619
6620   fprintf (file, "\n\n");
6621 }
6622
6623 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6624    new set, and store differences in DELTA.  Number of induction variables
6625    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6626    the function will try to find a solution with mimimal iv candidates.  */
6627
6628 static comp_cost
6629 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6630               struct iv_cand *cand, struct iv_ca_delta **delta,
6631               unsigned *n_ivs, bool min_ncand)
6632 {
6633   unsigned i;
6634   comp_cost cost;
6635   struct iv_group *group;
6636   class cost_pair *old_cp, *new_cp;
6637
6638   *delta = NULL;
6639   for (i = 0; i < ivs->upto; i++)
6640     {
6641       group = data->vgroups[i];
6642       old_cp = iv_ca_cand_for_group (ivs, group);
6643
6644       if (old_cp
6645           && old_cp->cand == cand)
6646         continue;
6647
6648       new_cp = get_group_iv_cost (data, group, cand);
6649       if (!new_cp)
6650         continue;
6651
6652       if (!min_ncand)
6653         {
6654           int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6655           /* Skip if new_cp depends on more invariants.  */
6656           if (cmp_invs > 0)
6657             continue;
6658
6659           int cmp_cost = compare_cost_pair (new_cp, old_cp);
6660           /* Skip if new_cp is not cheaper.  */
6661           if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6662             continue;
6663         }
6664
6665       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6666     }
6667
6668   iv_ca_delta_commit (data, ivs, *delta, true);
6669   cost = iv_ca_cost (ivs);
6670   if (n_ivs)
6671     *n_ivs = iv_ca_n_cands (ivs);
6672   iv_ca_delta_commit (data, ivs, *delta, false);
6673
6674   return cost;
6675 }
6676
6677 /* Try narrowing set IVS by removing CAND.  Return the cost of
6678    the new set and store the differences in DELTA.  START is
6679    the candidate with which we start narrowing.  */
6680
6681 static comp_cost
6682 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6683               struct iv_cand *cand, struct iv_cand *start,
6684               struct iv_ca_delta **delta)
6685 {
6686   unsigned i, ci;
6687   struct iv_group *group;
6688   class cost_pair *old_cp, *new_cp, *cp;
6689   bitmap_iterator bi;
6690   struct iv_cand *cnd;
6691   comp_cost cost, best_cost, acost;
6692
6693   *delta = NULL;
6694   for (i = 0; i < data->vgroups.length (); i++)
6695     {
6696       group = data->vgroups[i];
6697
6698       old_cp = iv_ca_cand_for_group (ivs, group);
6699       if (old_cp->cand != cand)
6700         continue;
6701
6702       best_cost = iv_ca_cost (ivs);
6703       /* Start narrowing with START.  */
6704       new_cp = get_group_iv_cost (data, group, start);
6705
6706       if (data->consider_all_candidates)
6707         {
6708           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6709             {
6710               if (ci == cand->id || (start && ci == start->id))
6711                 continue;
6712
6713               cnd = data->vcands[ci];
6714
6715               cp = get_group_iv_cost (data, group, cnd);
6716               if (!cp)
6717                 continue;
6718
6719               iv_ca_set_cp (data, ivs, group, cp);
6720               acost = iv_ca_cost (ivs);
6721
6722               if (acost < best_cost)
6723                 {
6724                   best_cost = acost;
6725                   new_cp = cp;
6726                 }
6727             }
6728         }
6729       else
6730         {
6731           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6732             {
6733               if (ci == cand->id || (start && ci == start->id))
6734                 continue;
6735
6736               cnd = data->vcands[ci];
6737
6738               cp = get_group_iv_cost (data, group, cnd);
6739               if (!cp)
6740                 continue;
6741
6742               iv_ca_set_cp (data, ivs, group, cp);
6743               acost = iv_ca_cost (ivs);
6744
6745               if (acost < best_cost)
6746                 {
6747                   best_cost = acost;
6748                   new_cp = cp;
6749                 }
6750             }
6751         }
6752       /* Restore to old cp for use.  */
6753       iv_ca_set_cp (data, ivs, group, old_cp);
6754
6755       if (!new_cp)
6756         {
6757           iv_ca_delta_free (delta);
6758           return infinite_cost;
6759         }
6760
6761       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6762     }
6763
6764   iv_ca_delta_commit (data, ivs, *delta, true);
6765   cost = iv_ca_cost (ivs);
6766   iv_ca_delta_commit (data, ivs, *delta, false);
6767
6768   return cost;
6769 }
6770
6771 /* Try optimizing the set of candidates IVS by removing candidates different
6772    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6773    differences in DELTA.  */
6774
6775 static comp_cost
6776 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6777              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6778 {
6779   bitmap_iterator bi;
6780   struct iv_ca_delta *act_delta, *best_delta;
6781   unsigned i;
6782   comp_cost best_cost, acost;
6783   struct iv_cand *cand;
6784
6785   best_delta = NULL;
6786   best_cost = iv_ca_cost (ivs);
6787
6788   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6789     {
6790       cand = data->vcands[i];
6791
6792       if (cand == except_cand)
6793         continue;
6794
6795       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6796
6797       if (acost < best_cost)
6798         {
6799           best_cost = acost;
6800           iv_ca_delta_free (&best_delta);
6801           best_delta = act_delta;
6802         }
6803       else
6804         iv_ca_delta_free (&act_delta);
6805     }
6806
6807   if (!best_delta)
6808     {
6809       *delta = NULL;
6810       return best_cost;
6811     }
6812
6813   /* Recurse to possibly remove other unnecessary ivs.  */
6814   iv_ca_delta_commit (data, ivs, best_delta, true);
6815   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6816   iv_ca_delta_commit (data, ivs, best_delta, false);
6817   *delta = iv_ca_delta_join (best_delta, *delta);
6818   return best_cost;
6819 }
6820
6821 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6822    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6823    the corresponding cost_pair, otherwise just return BEST_CP.  */
6824
6825 static class cost_pair*
6826 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6827                         unsigned int cand_idx, struct iv_cand *old_cand,
6828                         class cost_pair *best_cp)
6829 {
6830   struct iv_cand *cand;
6831   class cost_pair *cp;
6832
6833   gcc_assert (old_cand != NULL && best_cp != NULL);
6834   if (cand_idx == old_cand->id)
6835     return best_cp;
6836
6837   cand = data->vcands[cand_idx];
6838   cp = get_group_iv_cost (data, group, cand);
6839   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6840     return cp;
6841
6842   return best_cp;
6843 }
6844
6845 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6846    which are used by more than one iv uses.  For each of those candidates,
6847    this function tries to represent iv uses under that candidate using
6848    other ones with lower local cost, then tries to prune the new set.
6849    If the new set has lower cost, It returns the new cost after recording
6850    candidate replacement in list DELTA.  */
6851
6852 static comp_cost
6853 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6854                struct iv_ca_delta **delta)
6855 {
6856   bitmap_iterator bi, bj;
6857   unsigned int i, j, k;
6858   struct iv_cand *cand;
6859   comp_cost orig_cost, acost;
6860   struct iv_ca_delta *act_delta, *tmp_delta;
6861   class cost_pair *old_cp, *best_cp = NULL;
6862
6863   *delta = NULL;
6864   orig_cost = iv_ca_cost (ivs);
6865
6866   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6867     {
6868       if (ivs->n_cand_uses[i] == 1
6869           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6870         continue;
6871
6872       cand = data->vcands[i];
6873
6874       act_delta = NULL;
6875       /*  Represent uses under current candidate using other ones with
6876           lower local cost.  */
6877       for (j = 0; j < ivs->upto; j++)
6878         {
6879           struct iv_group *group = data->vgroups[j];
6880           old_cp = iv_ca_cand_for_group (ivs, group);
6881
6882           if (old_cp->cand != cand)
6883             continue;
6884
6885           best_cp = old_cp;
6886           if (data->consider_all_candidates)
6887             for (k = 0; k < data->vcands.length (); k++)
6888               best_cp = cheaper_cost_with_cand (data, group, k,
6889                                                 old_cp->cand, best_cp);
6890           else
6891             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6892               best_cp = cheaper_cost_with_cand (data, group, k,
6893                                                 old_cp->cand, best_cp);
6894
6895           if (best_cp == old_cp)
6896             continue;
6897
6898           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6899         }
6900       /* No need for further prune.  */
6901       if (!act_delta)
6902         continue;
6903
6904       /* Prune the new candidate set.  */
6905       iv_ca_delta_commit (data, ivs, act_delta, true);
6906       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6907       iv_ca_delta_commit (data, ivs, act_delta, false);
6908       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6909
6910       if (acost < orig_cost)
6911         {
6912           *delta = act_delta;
6913           return acost;
6914         }
6915       else
6916         iv_ca_delta_free (&act_delta);
6917     }
6918
6919   return orig_cost;
6920 }
6921
6922 /* Tries to extend the sets IVS in the best possible way in order to
6923    express the GROUP.  If ORIGINALP is true, prefer candidates from
6924    the original set of IVs, otherwise favor important candidates not
6925    based on any memory object.  */
6926
6927 static bool
6928 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6929                   struct iv_group *group, bool originalp)
6930 {
6931   comp_cost best_cost, act_cost;
6932   unsigned i;
6933   bitmap_iterator bi;
6934   struct iv_cand *cand;
6935   struct iv_ca_delta *best_delta = NULL, *act_delta;
6936   class cost_pair *cp;
6937
6938   iv_ca_add_group (data, ivs, group);
6939   best_cost = iv_ca_cost (ivs);
6940   cp = iv_ca_cand_for_group (ivs, group);
6941   if (cp)
6942     {
6943       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6944       iv_ca_set_no_cp (data, ivs, group);
6945     }
6946
6947   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6948      first try important candidates not based on any memory object.  Only if
6949      this fails, try the specific ones.  Rationale -- in loops with many
6950      variables the best choice often is to use just one generic biv.  If we
6951      added here many ivs specific to the uses, the optimization algorithm later
6952      would be likely to get stuck in a local minimum, thus causing us to create
6953      too many ivs.  The approach from few ivs to more seems more likely to be
6954      successful -- starting from few ivs, replacing an expensive use by a
6955      specific iv should always be a win.  */
6956   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6957     {
6958       cand = data->vcands[i];
6959
6960       if (originalp && cand->pos !=IP_ORIGINAL)
6961         continue;
6962
6963       if (!originalp && cand->iv->base_object != NULL_TREE)
6964         continue;
6965
6966       if (iv_ca_cand_used_p (ivs, cand))
6967         continue;
6968
6969       cp = get_group_iv_cost (data, group, cand);
6970       if (!cp)
6971         continue;
6972
6973       iv_ca_set_cp (data, ivs, group, cp);
6974       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6975                                true);
6976       iv_ca_set_no_cp (data, ivs, group);
6977       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6978
6979       if (act_cost < best_cost)
6980         {
6981           best_cost = act_cost;
6982
6983           iv_ca_delta_free (&best_delta);
6984           best_delta = act_delta;
6985         }
6986       else
6987         iv_ca_delta_free (&act_delta);
6988     }
6989
6990   if (best_cost.infinite_cost_p ())
6991     {
6992       for (i = 0; i < group->n_map_members; i++)
6993         {
6994           cp = group->cost_map + i;
6995           cand = cp->cand;
6996           if (!cand)
6997             continue;
6998
6999           /* Already tried this.  */
7000           if (cand->important)
7001             {
7002               if (originalp && cand->pos == IP_ORIGINAL)
7003                 continue;
7004               if (!originalp && cand->iv->base_object == NULL_TREE)
7005                 continue;
7006             }
7007
7008           if (iv_ca_cand_used_p (ivs, cand))
7009             continue;
7010
7011           act_delta = NULL;
7012           iv_ca_set_cp (data, ivs, group, cp);
7013           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7014           iv_ca_set_no_cp (data, ivs, group);
7015           act_delta = iv_ca_delta_add (group,
7016                                        iv_ca_cand_for_group (ivs, group),
7017                                        cp, act_delta);
7018
7019           if (act_cost < best_cost)
7020             {
7021               best_cost = act_cost;
7022
7023               if (best_delta)
7024                 iv_ca_delta_free (&best_delta);
7025               best_delta = act_delta;
7026             }
7027           else
7028             iv_ca_delta_free (&act_delta);
7029         }
7030     }
7031
7032   iv_ca_delta_commit (data, ivs, best_delta, true);
7033   iv_ca_delta_free (&best_delta);
7034
7035   return !best_cost.infinite_cost_p ();
7036 }
7037
7038 /* Finds an initial assignment of candidates to uses.  */
7039
7040 static class iv_ca *
7041 get_initial_solution (struct ivopts_data *data, bool originalp)
7042 {
7043   unsigned i;
7044   class iv_ca *ivs = iv_ca_new (data);
7045
7046   for (i = 0; i < data->vgroups.length (); i++)
7047     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7048       {
7049         iv_ca_free (&ivs);
7050         return NULL;
7051       }
7052
7053   return ivs;
7054 }
7055
7056 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
7057    points to a bool variable, this function tries to break local
7058    optimal fixed-point by replacing candidates in IVS if it's true.  */
7059
7060 static bool
7061 try_improve_iv_set (struct ivopts_data *data,
7062                     class iv_ca *ivs, bool *try_replace_p)
7063 {
7064   unsigned i, n_ivs;
7065   comp_cost acost, best_cost = iv_ca_cost (ivs);
7066   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7067   struct iv_cand *cand;
7068
7069   /* Try extending the set of induction variables by one.  */
7070   for (i = 0; i < data->vcands.length (); i++)
7071     {
7072       cand = data->vcands[i];
7073
7074       if (iv_ca_cand_used_p (ivs, cand))
7075         continue;
7076
7077       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7078       if (!act_delta)
7079         continue;
7080
7081       /* If we successfully added the candidate and the set is small enough,
7082          try optimizing it by removing other candidates.  */
7083       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7084         {
7085           iv_ca_delta_commit (data, ivs, act_delta, true);
7086           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7087           iv_ca_delta_commit (data, ivs, act_delta, false);
7088           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7089         }
7090
7091       if (acost < best_cost)
7092         {
7093           best_cost = acost;
7094           iv_ca_delta_free (&best_delta);
7095           best_delta = act_delta;
7096         }
7097       else
7098         iv_ca_delta_free (&act_delta);
7099     }
7100
7101   if (!best_delta)
7102     {
7103       /* Try removing the candidates from the set instead.  */
7104       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7105
7106       if (!best_delta && *try_replace_p)
7107         {
7108           *try_replace_p = false;
7109           /* So far candidate selecting algorithm tends to choose fewer IVs
7110              so that it can handle cases in which loops have many variables
7111              but the best choice is often to use only one general biv.  One
7112              weakness is it can't handle opposite cases, in which different
7113              candidates should be chosen with respect to each use.  To solve
7114              the problem, we replace candidates in a manner described by the
7115              comments of iv_ca_replace, thus give general algorithm a chance
7116              to break local optimal fixed-point in these cases.  */
7117           best_cost = iv_ca_replace (data, ivs, &best_delta);
7118         }
7119
7120       if (!best_delta)
7121         return false;
7122     }
7123
7124   iv_ca_delta_commit (data, ivs, best_delta, true);
7125   iv_ca_delta_free (&best_delta);
7126   return best_cost == iv_ca_cost (ivs);
7127 }
7128
7129 /* Attempts to find the optimal set of induction variables.  We do simple
7130    greedy heuristic -- we try to replace at most one candidate in the selected
7131    solution and remove the unused ivs while this improves the cost.  */
7132
7133 static class iv_ca *
7134 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7135 {
7136   class iv_ca *set;
7137   bool try_replace_p = true;
7138
7139   /* Get the initial solution.  */
7140   set = get_initial_solution (data, originalp);
7141   if (!set)
7142     {
7143       if (dump_file && (dump_flags & TDF_DETAILS))
7144         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7145       return NULL;
7146     }
7147
7148   if (dump_file && (dump_flags & TDF_DETAILS))
7149     {
7150       fprintf (dump_file, "Initial set of candidates:\n");
7151       iv_ca_dump (data, dump_file, set);
7152     }
7153
7154   while (try_improve_iv_set (data, set, &try_replace_p))
7155     {
7156       if (dump_file && (dump_flags & TDF_DETAILS))
7157         {
7158           fprintf (dump_file, "Improved to:\n");
7159           iv_ca_dump (data, dump_file, set);
7160         }
7161     }
7162
7163   /* If the set has infinite_cost, it can't be optimal.  */
7164   if (iv_ca_cost (set).infinite_cost_p ())
7165     {
7166       if (dump_file && (dump_flags & TDF_DETAILS))
7167         fprintf (dump_file,
7168                  "Overflow to infinite cost in try_improve_iv_set.\n");
7169       iv_ca_free (&set);
7170     }
7171   return set;
7172 }
7173
7174 static class iv_ca *
7175 find_optimal_iv_set (struct ivopts_data *data)
7176 {
7177   unsigned i;
7178   comp_cost cost, origcost;
7179   class iv_ca *set, *origset;
7180
7181   /* Determine the cost based on a strategy that starts with original IVs,
7182      and try again using a strategy that prefers candidates not based
7183      on any IVs.  */
7184   origset = find_optimal_iv_set_1 (data, true);
7185   set = find_optimal_iv_set_1 (data, false);
7186
7187   if (!origset && !set)
7188     return NULL;
7189
7190   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7191   cost = set ? iv_ca_cost (set) : infinite_cost;
7192
7193   if (dump_file && (dump_flags & TDF_DETAILS))
7194     {
7195       fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7196                origcost.cost, origcost.complexity);
7197       fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7198                cost.cost, cost.complexity);
7199     }
7200
7201   /* Choose the one with the best cost.  */
7202   if (origcost <= cost)
7203     {
7204       if (set)
7205         iv_ca_free (&set);
7206       set = origset;
7207     }
7208   else if (origset)
7209     iv_ca_free (&origset);
7210
7211   for (i = 0; i < data->vgroups.length (); i++)
7212     {
7213       struct iv_group *group = data->vgroups[i];
7214       group->selected = iv_ca_cand_for_group (set, group)->cand;
7215     }
7216
7217   return set;
7218 }
7219
7220 /* Creates a new induction variable corresponding to CAND.  */
7221
7222 static void
7223 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7224 {
7225   gimple_stmt_iterator incr_pos;
7226   tree base;
7227   struct iv_use *use;
7228   struct iv_group *group;
7229   bool after = false;
7230
7231   gcc_assert (cand->iv != NULL);
7232
7233   switch (cand->pos)
7234     {
7235     case IP_NORMAL:
7236       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7237       break;
7238
7239     case IP_END:
7240       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7241       after = true;
7242       if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7243         {
7244           edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7245           incr_pos = gsi_after_labels (split_edge (e));
7246           after = false;
7247         }
7248       break;
7249
7250     case IP_AFTER_USE:
7251       after = true;
7252       /* fall through */
7253     case IP_BEFORE_USE:
7254       incr_pos = gsi_for_stmt (cand->incremented_at);
7255       break;
7256
7257     case IP_ORIGINAL:
7258       /* Mark that the iv is preserved.  */
7259       name_info (data, cand->var_before)->preserve_biv = true;
7260       name_info (data, cand->var_after)->preserve_biv = true;
7261
7262       /* Rewrite the increment so that it uses var_before directly.  */
7263       use = find_interesting_uses_op (data, cand->var_after);
7264       group = data->vgroups[use->group_id];
7265       group->selected = cand;
7266       return;
7267     }
7268
7269   gimple_add_tmp_var (cand->var_before);
7270
7271   base = unshare_expr (cand->iv->base);
7272
7273   create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7274              cand->var_before, data->current_loop,
7275              &incr_pos, after, &cand->var_before, &cand->var_after);
7276 }
7277
7278 /* Creates new induction variables described in SET.  */
7279
7280 static void
7281 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7282 {
7283   unsigned i;
7284   struct iv_cand *cand;
7285   bitmap_iterator bi;
7286
7287   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7288     {
7289       cand = data->vcands[i];
7290       create_new_iv (data, cand);
7291     }
7292
7293   if (dump_file && (dump_flags & TDF_DETAILS))
7294     {
7295       fprintf (dump_file, "Selected IV set for loop %d",
7296                data->current_loop->num);
7297       if (data->loop_loc != UNKNOWN_LOCATION)
7298         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7299                  LOCATION_LINE (data->loop_loc));
7300       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7301                avg_loop_niter (data->current_loop));
7302       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7303       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7304         {
7305           cand = data->vcands[i];
7306           dump_cand (dump_file, cand);
7307         }
7308       fprintf (dump_file, "\n");
7309     }
7310 }
7311
7312 /* Rewrites USE (definition of iv used in a nonlinear expression)
7313    using candidate CAND.  */
7314
7315 static void
7316 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7317                             struct iv_use *use, struct iv_cand *cand)
7318 {
7319   gassign *ass;
7320   gimple_stmt_iterator bsi;
7321   tree comp, type = get_use_type (use), tgt;
7322
7323   /* An important special case -- if we are asked to express value of
7324      the original iv by itself, just exit; there is no need to
7325      introduce a new computation (that might also need casting the
7326      variable to unsigned and back).  */
7327   if (cand->pos == IP_ORIGINAL
7328       && cand->incremented_at == use->stmt)
7329     {
7330       tree op = NULL_TREE;
7331       enum tree_code stmt_code;
7332
7333       gcc_assert (is_gimple_assign (use->stmt));
7334       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7335
7336       /* Check whether we may leave the computation unchanged.
7337          This is the case only if it does not rely on other
7338          computations in the loop -- otherwise, the computation
7339          we rely upon may be removed in remove_unused_ivs,
7340          thus leading to ICE.  */
7341       stmt_code = gimple_assign_rhs_code (use->stmt);
7342       if (stmt_code == PLUS_EXPR
7343           || stmt_code == MINUS_EXPR
7344           || stmt_code == POINTER_PLUS_EXPR)
7345         {
7346           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7347             op = gimple_assign_rhs2 (use->stmt);
7348           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7349             op = gimple_assign_rhs1 (use->stmt);
7350         }
7351
7352       if (op != NULL_TREE)
7353         {
7354           if (expr_invariant_in_loop_p (data->current_loop, op))
7355             return;
7356           if (TREE_CODE (op) == SSA_NAME)
7357             {
7358               struct iv *iv = get_iv (data, op);
7359               if (iv != NULL && integer_zerop (iv->step))
7360                 return;
7361             }
7362         }
7363     }
7364
7365   switch (gimple_code (use->stmt))
7366     {
7367     case GIMPLE_PHI:
7368       tgt = PHI_RESULT (use->stmt);
7369
7370       /* If we should keep the biv, do not replace it.  */
7371       if (name_info (data, tgt)->preserve_biv)
7372         return;
7373
7374       bsi = gsi_after_labels (gimple_bb (use->stmt));
7375       break;
7376
7377     case GIMPLE_ASSIGN:
7378       tgt = gimple_assign_lhs (use->stmt);
7379       bsi = gsi_for_stmt (use->stmt);
7380       break;
7381
7382     default:
7383       gcc_unreachable ();
7384     }
7385
7386   aff_tree aff_inv, aff_var;
7387   if (!get_computation_aff_1 (data->current_loop, use->stmt,
7388                               use, cand, &aff_inv, &aff_var))
7389     gcc_unreachable ();
7390
7391   unshare_aff_combination (&aff_inv);
7392   unshare_aff_combination (&aff_var);
7393   /* Prefer CSE opportunity than loop invariant by adding offset at last
7394      so that iv_uses have different offsets can be CSEed.  */
7395   poly_widest_int offset = aff_inv.offset;
7396   aff_inv.offset = 0;
7397
7398   gimple_seq stmt_list = NULL, seq = NULL;
7399   tree comp_op1 = aff_combination_to_tree (&aff_inv);
7400   tree comp_op2 = aff_combination_to_tree (&aff_var);
7401   gcc_assert (comp_op1 && comp_op2);
7402
7403   comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7404   gimple_seq_add_seq (&stmt_list, seq);
7405   comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7406   gimple_seq_add_seq (&stmt_list, seq);
7407
7408   if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7409     std::swap (comp_op1, comp_op2);
7410
7411   if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7412     {
7413       comp = fold_build_pointer_plus (comp_op1,
7414                                       fold_convert (sizetype, comp_op2));
7415       comp = fold_build_pointer_plus (comp,
7416                                       wide_int_to_tree (sizetype, offset));
7417     }
7418   else
7419     {
7420       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7421                           fold_convert (TREE_TYPE (comp_op1), comp_op2));
7422       comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7423                           wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7424     }
7425
7426   comp = fold_convert (type, comp);
7427   comp = force_gimple_operand (comp, &seq, false, NULL);
7428   gimple_seq_add_seq (&stmt_list, seq);
7429   if (gimple_code (use->stmt) != GIMPLE_PHI
7430       /* We can't allow re-allocating the stmt as it might be pointed
7431          to still.  */
7432       && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7433           >= gimple_num_ops (gsi_stmt (bsi))))
7434     {
7435       comp = force_gimple_operand (comp, &seq, true, NULL);
7436       gimple_seq_add_seq (&stmt_list, seq);
7437       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7438         {
7439           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7440           /* As this isn't a plain copy we have to reset alignment
7441              information.  */
7442           if (SSA_NAME_PTR_INFO (comp))
7443             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7444         }
7445     }
7446
7447   gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7448   if (gimple_code (use->stmt) == GIMPLE_PHI)
7449     {
7450       ass = gimple_build_assign (tgt, comp);
7451       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7452
7453       bsi = gsi_for_stmt (use->stmt);
7454       remove_phi_node (&bsi, false);
7455     }
7456   else
7457     {
7458       gimple_assign_set_rhs_from_tree (&bsi, comp);
7459       use->stmt = gsi_stmt (bsi);
7460     }
7461 }
7462
7463 /* Performs a peephole optimization to reorder the iv update statement with
7464    a mem ref to enable instruction combining in later phases. The mem ref uses
7465    the iv value before the update, so the reordering transformation requires
7466    adjustment of the offset. CAND is the selected IV_CAND.
7467
7468    Example:
7469
7470    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7471    iv2 = iv1 + 1;
7472
7473    if (t < val)      (1)
7474      goto L;
7475    goto Head;
7476
7477
7478    directly propagating t over to (1) will introduce overlapping live range
7479    thus increase register pressure. This peephole transform it into:
7480
7481
7482    iv2 = iv1 + 1;
7483    t = MEM_REF (base, iv2, 8, 8);
7484    if (t < val)
7485      goto L;
7486    goto Head;
7487 */
7488
7489 static void
7490 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7491 {
7492   tree var_after;
7493   gimple *iv_update, *stmt;
7494   basic_block bb;
7495   gimple_stmt_iterator gsi, gsi_iv;
7496
7497   if (cand->pos != IP_NORMAL)
7498     return;
7499
7500   var_after = cand->var_after;
7501   iv_update = SSA_NAME_DEF_STMT (var_after);
7502
7503   bb = gimple_bb (iv_update);
7504   gsi = gsi_last_nondebug_bb (bb);
7505   stmt = gsi_stmt (gsi);
7506
7507   /* Only handle conditional statement for now.  */
7508   if (gimple_code (stmt) != GIMPLE_COND)
7509     return;
7510
7511   gsi_prev_nondebug (&gsi);
7512   stmt = gsi_stmt (gsi);
7513   if (stmt != iv_update)
7514     return;
7515
7516   gsi_prev_nondebug (&gsi);
7517   if (gsi_end_p (gsi))
7518     return;
7519
7520   stmt = gsi_stmt (gsi);
7521   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7522     return;
7523
7524   if (stmt != use->stmt)
7525     return;
7526
7527   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7528     return;
7529
7530   if (dump_file && (dump_flags & TDF_DETAILS))
7531     {
7532       fprintf (dump_file, "Reordering \n");
7533       print_gimple_stmt (dump_file, iv_update, 0);
7534       print_gimple_stmt (dump_file, use->stmt, 0);
7535       fprintf (dump_file, "\n");
7536     }
7537
7538   gsi = gsi_for_stmt (use->stmt);
7539   gsi_iv = gsi_for_stmt (iv_update);
7540   gsi_move_before (&gsi_iv, &gsi);
7541
7542   cand->pos = IP_BEFORE_USE;
7543   cand->incremented_at = use->stmt;
7544 }
7545
7546 /* Return the alias pointer type that should be used for a MEM_REF
7547    associated with USE, which has type USE_PTR_ADDRESS.  */
7548
7549 static tree
7550 get_alias_ptr_type_for_ptr_address (iv_use *use)
7551 {
7552   gcall *call = as_a <gcall *> (use->stmt);
7553   switch (gimple_call_internal_fn (call))
7554     {
7555     case IFN_MASK_LOAD:
7556     case IFN_MASK_STORE:
7557     case IFN_MASK_LOAD_LANES:
7558     case IFN_MASK_STORE_LANES:
7559     case IFN_MASK_LEN_LOAD_LANES:
7560     case IFN_MASK_LEN_STORE_LANES:
7561     case IFN_LEN_LOAD:
7562     case IFN_LEN_STORE:
7563     case IFN_MASK_LEN_LOAD:
7564     case IFN_MASK_LEN_STORE:
7565       /* The second argument contains the correct alias type.  */
7566       gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7567       return TREE_TYPE (gimple_call_arg (call, 1));
7568
7569     default:
7570       gcc_unreachable ();
7571     }
7572 }
7573
7574
7575 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7576
7577 static void
7578 rewrite_use_address (struct ivopts_data *data,
7579                      struct iv_use *use, struct iv_cand *cand)
7580 {
7581   aff_tree aff;
7582   bool ok;
7583
7584   adjust_iv_update_pos (cand, use);
7585   ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7586   gcc_assert (ok);
7587   unshare_aff_combination (&aff);
7588
7589   /* To avoid undefined overflow problems, all IV candidates use unsigned
7590      integer types.  The drawback is that this makes it impossible for
7591      create_mem_ref to distinguish an IV that is based on a memory object
7592      from one that represents simply an offset.
7593
7594      To work around this problem, we pass a hint to create_mem_ref that
7595      indicates which variable (if any) in aff is an IV based on a memory
7596      object.  Note that we only consider the candidate.  If this is not
7597      based on an object, the base of the reference is in some subexpression
7598      of the use -- but these will use pointer types, so they are recognized
7599      by the create_mem_ref heuristics anyway.  */
7600   tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7601   tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7602   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7603   tree type = use->mem_type;
7604   tree alias_ptr_type;
7605   if (use->type == USE_PTR_ADDRESS)
7606     alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7607   else
7608     {
7609       gcc_assert (type == TREE_TYPE (*use->op_p));
7610       unsigned int align = get_object_alignment (*use->op_p);
7611       if (align != TYPE_ALIGN (type))
7612         type = build_aligned_type (type, align);
7613       alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7614     }
7615   tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7616                              iv, base_hint, data->speed);
7617
7618   if (use->type == USE_PTR_ADDRESS)
7619     {
7620       ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7621       ref = fold_convert (get_use_type (use), ref);
7622       ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7623                                       true, GSI_SAME_STMT);
7624     }
7625   else
7626     {
7627       /* When we end up confused enough and have no suitable base but
7628          stuffed everything to index2 use a LEA for the address and
7629          create a plain MEM_REF to avoid basing a memory reference
7630          on address zero which create_mem_ref_raw does as fallback.  */
7631       if (TREE_CODE (ref) == TARGET_MEM_REF
7632           && TMR_INDEX2 (ref) != NULL_TREE
7633           && integer_zerop (TREE_OPERAND (ref, 0)))
7634         {
7635           ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7636           ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637                                           true, GSI_SAME_STMT);
7638           ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7639         }
7640       copy_ref_info (ref, *use->op_p);
7641     }
7642
7643   *use->op_p = ref;
7644 }
7645
7646 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7647    candidate CAND.  */
7648
7649 static void
7650 rewrite_use_compare (struct ivopts_data *data,
7651                      struct iv_use *use, struct iv_cand *cand)
7652 {
7653   tree comp, op, bound;
7654   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7655   enum tree_code compare;
7656   struct iv_group *group = data->vgroups[use->group_id];
7657   class cost_pair *cp = get_group_iv_cost (data, group, cand);
7658
7659   bound = cp->value;
7660   if (bound)
7661     {
7662       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7663       tree var_type = TREE_TYPE (var);
7664       gimple_seq stmts;
7665
7666       if (dump_file && (dump_flags & TDF_DETAILS))
7667         {
7668           fprintf (dump_file, "Replacing exit test: ");
7669           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7670         }
7671       compare = cp->comp;
7672       bound = unshare_expr (fold_convert (var_type, bound));
7673       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7674       if (stmts)
7675         gsi_insert_seq_on_edge_immediate (
7676                 loop_preheader_edge (data->current_loop),
7677                 stmts);
7678
7679       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7680       gimple_cond_set_lhs (cond_stmt, var);
7681       gimple_cond_set_code (cond_stmt, compare);
7682       gimple_cond_set_rhs (cond_stmt, op);
7683       return;
7684     }
7685
7686   /* The induction variable elimination failed; just express the original
7687      giv.  */
7688   comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7689   gcc_assert (comp != NULL_TREE);
7690   gcc_assert (use->op_p != NULL);
7691   *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7692                                          SSA_NAME_VAR (*use->op_p),
7693                                          true, GSI_SAME_STMT);
7694 }
7695
7696 /* Rewrite the groups using the selected induction variables.  */
7697
7698 static void
7699 rewrite_groups (struct ivopts_data *data)
7700 {
7701   unsigned i, j;
7702
7703   for (i = 0; i < data->vgroups.length (); i++)
7704     {
7705       struct iv_group *group = data->vgroups[i];
7706       struct iv_cand *cand = group->selected;
7707
7708       gcc_assert (cand);
7709
7710       if (group->type == USE_NONLINEAR_EXPR)
7711         {
7712           for (j = 0; j < group->vuses.length (); j++)
7713             {
7714               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7715               update_stmt (group->vuses[j]->stmt);
7716             }
7717         }
7718       else if (address_p (group->type))
7719         {
7720           for (j = 0; j < group->vuses.length (); j++)
7721             {
7722               rewrite_use_address (data, group->vuses[j], cand);
7723               update_stmt (group->vuses[j]->stmt);
7724             }
7725         }
7726       else
7727         {
7728           gcc_assert (group->type == USE_COMPARE);
7729
7730           for (j = 0; j < group->vuses.length (); j++)
7731             {
7732               rewrite_use_compare (data, group->vuses[j], cand);
7733               update_stmt (group->vuses[j]->stmt);
7734             }
7735         }
7736     }
7737 }
7738
7739 /* Removes the ivs that are not used after rewriting.  */
7740
7741 static void
7742 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7743 {
7744   unsigned j;
7745   bitmap_iterator bi;
7746
7747   /* Figure out an order in which to release SSA DEFs so that we don't
7748      release something that we'd have to propagate into a debug stmt
7749      afterwards.  */
7750   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7751     {
7752       struct version_info *info;
7753
7754       info = ver_info (data, j);
7755       if (info->iv
7756           && !integer_zerop (info->iv->step)
7757           && !info->inv_id
7758           && !info->iv->nonlin_use
7759           && !info->preserve_biv)
7760         {
7761           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7762
7763           tree def = info->iv->ssa_name;
7764
7765           if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7766             {
7767               imm_use_iterator imm_iter;
7768               use_operand_p use_p;
7769               gimple *stmt;
7770               int count = 0;
7771
7772               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7773                 {
7774                   if (!gimple_debug_bind_p (stmt))
7775                     continue;
7776
7777                   /* We just want to determine whether to do nothing
7778                      (count == 0), to substitute the computed
7779                      expression into a single use of the SSA DEF by
7780                      itself (count == 1), or to use a debug temp
7781                      because the SSA DEF is used multiple times or as
7782                      part of a larger expression (count > 1). */
7783                   count++;
7784                   if (gimple_debug_bind_get_value (stmt) != def)
7785                     count++;
7786
7787                   if (count > 1)
7788                     break;
7789                 }
7790
7791               if (!count)
7792                 continue;
7793
7794               struct iv_use dummy_use;
7795               struct iv_cand *best_cand = NULL, *cand;
7796               unsigned i, best_pref = 0, cand_pref;
7797               tree comp = NULL_TREE;
7798
7799               memset (&dummy_use, 0, sizeof (dummy_use));
7800               dummy_use.iv = info->iv;
7801               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7802                 {
7803                   cand = data->vgroups[i]->selected;
7804                   if (cand == best_cand)
7805                     continue;
7806                   cand_pref = operand_equal_p (cand->iv->step,
7807                                                info->iv->step, 0)
7808                     ? 4 : 0;
7809                   cand_pref
7810                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7811                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7812                     ? 2 : 0;
7813                   cand_pref
7814                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7815                     ? 1 : 0;
7816                   if (best_cand == NULL || best_pref < cand_pref)
7817                     {
7818                       tree this_comp
7819                         = get_debug_computation_at (data->current_loop,
7820                                                     SSA_NAME_DEF_STMT (def),
7821                                                     &dummy_use, cand);
7822                       if (this_comp)
7823                         {
7824                           best_cand = cand;
7825                           best_pref = cand_pref;
7826                           comp = this_comp;
7827                         }
7828                     }
7829                 }
7830
7831               if (!best_cand)
7832                 continue;
7833
7834               comp = unshare_expr (comp);
7835               if (count > 1)
7836                 {
7837                   tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7838                   /* FIXME: Is setting the mode really necessary? */
7839                   if (SSA_NAME_VAR (def))
7840                     SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7841                   else
7842                     SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7843                   gdebug *def_temp
7844                     = gimple_build_debug_bind (vexpr, comp, NULL);
7845                   gimple_stmt_iterator gsi;
7846
7847                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7848                     gsi = gsi_after_labels (gimple_bb
7849                                             (SSA_NAME_DEF_STMT (def)));
7850                   else
7851                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7852
7853                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7854                   comp = vexpr;
7855                 }
7856
7857               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7858                 {
7859                   if (!gimple_debug_bind_p (stmt))
7860                     continue;
7861
7862                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7863                     SET_USE (use_p, comp);
7864
7865                   update_stmt (stmt);
7866                 }
7867             }
7868         }
7869     }
7870 }
7871
7872 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7873    for hash_map::traverse.  */
7874
7875 bool
7876 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7877 {
7878   if (value)
7879     {
7880       value->~tree_niter_desc ();
7881       free (value);
7882     }
7883   return true;
7884 }
7885
7886 /* Frees data allocated by the optimization of a single loop.  */
7887
7888 static void
7889 free_loop_data (struct ivopts_data *data)
7890 {
7891   unsigned i, j;
7892   bitmap_iterator bi;
7893   tree obj;
7894
7895   if (data->niters)
7896     {
7897       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7898       delete data->niters;
7899       data->niters = NULL;
7900     }
7901
7902   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7903     {
7904       struct version_info *info;
7905
7906       info = ver_info (data, i);
7907       info->iv = NULL;
7908       info->has_nonlin_use = false;
7909       info->preserve_biv = false;
7910       info->inv_id = 0;
7911     }
7912   bitmap_clear (data->relevant);
7913   bitmap_clear (data->important_candidates);
7914
7915   for (i = 0; i < data->vgroups.length (); i++)
7916     {
7917       struct iv_group *group = data->vgroups[i];
7918
7919       for (j = 0; j < group->vuses.length (); j++)
7920         free (group->vuses[j]);
7921       group->vuses.release ();
7922
7923       BITMAP_FREE (group->related_cands);
7924       for (j = 0; j < group->n_map_members; j++)
7925         {
7926           if (group->cost_map[j].inv_vars)
7927             BITMAP_FREE (group->cost_map[j].inv_vars);
7928           if (group->cost_map[j].inv_exprs)
7929             BITMAP_FREE (group->cost_map[j].inv_exprs);
7930         }
7931
7932       free (group->cost_map);
7933       free (group);
7934     }
7935   data->vgroups.truncate (0);
7936
7937   for (i = 0; i < data->vcands.length (); i++)
7938     {
7939       struct iv_cand *cand = data->vcands[i];
7940
7941       if (cand->inv_vars)
7942         BITMAP_FREE (cand->inv_vars);
7943       if (cand->inv_exprs)
7944         BITMAP_FREE (cand->inv_exprs);
7945       free (cand);
7946     }
7947   data->vcands.truncate (0);
7948
7949   if (data->version_info_size < num_ssa_names)
7950     {
7951       data->version_info_size = 2 * num_ssa_names;
7952       free (data->version_info);
7953       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7954     }
7955
7956   data->max_inv_var_id = 0;
7957   data->max_inv_expr_id = 0;
7958
7959   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7960     SET_DECL_RTL (obj, NULL_RTX);
7961
7962   decl_rtl_to_reset.truncate (0);
7963
7964   data->inv_expr_tab->empty ();
7965
7966   data->iv_common_cand_tab->empty ();
7967   data->iv_common_cands.truncate (0);
7968 }
7969
7970 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7971    loop tree.  */
7972
7973 static void
7974 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7975 {
7976   free_loop_data (data);
7977   free (data->version_info);
7978   BITMAP_FREE (data->relevant);
7979   BITMAP_FREE (data->important_candidates);
7980
7981   decl_rtl_to_reset.release ();
7982   data->vgroups.release ();
7983   data->vcands.release ();
7984   delete data->inv_expr_tab;
7985   data->inv_expr_tab = NULL;
7986   free_affine_expand_cache (&data->name_expansion_cache);
7987   if (data->base_object_map)
7988     delete data->base_object_map;
7989   delete data->iv_common_cand_tab;
7990   data->iv_common_cand_tab = NULL;
7991   data->iv_common_cands.release ();
7992   obstack_free (&data->iv_obstack, NULL);
7993 }
7994
7995 /* Returns true if the loop body BODY includes any function calls.  */
7996
7997 static bool
7998 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7999 {
8000   gimple_stmt_iterator gsi;
8001   unsigned i;
8002
8003   for (i = 0; i < num_nodes; i++)
8004     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8005       {
8006         gimple *stmt = gsi_stmt (gsi);
8007         if (is_gimple_call (stmt)
8008             && !gimple_call_internal_p (stmt)
8009             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8010           return true;
8011       }
8012   return false;
8013 }
8014
8015 /* Determine cost scaling factor for basic blocks in loop.  */
8016 #define COST_SCALING_FACTOR_BOUND (20)
8017
8018 static void
8019 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8020 {
8021   int lfreq = data->current_loop->header->count.to_frequency (cfun);
8022   if (!data->speed || lfreq <= 0)
8023     return;
8024
8025   int max_freq = lfreq;
8026   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8027     {
8028       body[i]->aux = (void *)(intptr_t) 1;
8029       if (max_freq < body[i]->count.to_frequency (cfun))
8030         max_freq = body[i]->count.to_frequency (cfun);
8031     }
8032   if (max_freq > lfreq)
8033     {
8034       int divisor, factor;
8035       /* Check if scaling factor itself needs to be scaled by the bound.  This
8036          is to avoid overflow when scaling cost according to profile info.  */
8037       if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8038         {
8039           divisor = max_freq;
8040           factor = COST_SCALING_FACTOR_BOUND;
8041         }
8042       else
8043         {
8044           divisor = lfreq;
8045           factor = 1;
8046         }
8047       for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8048         {
8049           int bfreq = body[i]->count.to_frequency (cfun);
8050           if (bfreq <= lfreq)
8051             continue;
8052
8053           body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8054         }
8055     }
8056 }
8057
8058 /* Find doloop comparison use and set its doloop_p on if found.  */
8059
8060 static bool
8061 find_doloop_use (struct ivopts_data *data)
8062 {
8063   struct loop *loop = data->current_loop;
8064
8065   for (unsigned i = 0; i < data->vgroups.length (); i++)
8066     {
8067       struct iv_group *group = data->vgroups[i];
8068       if (group->type == USE_COMPARE)
8069         {
8070           gcc_assert (group->vuses.length () == 1);
8071           struct iv_use *use = group->vuses[0];
8072           gimple *stmt = use->stmt;
8073           if (gimple_code (stmt) == GIMPLE_COND)
8074             {
8075               basic_block bb = gimple_bb (stmt);
8076               edge true_edge, false_edge;
8077               extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8078               /* This comparison is used for loop latch.  Require latch is empty
8079                  for now.  */
8080               if ((loop->latch == true_edge->dest
8081                    || loop->latch == false_edge->dest)
8082                   && empty_block_p (loop->latch))
8083                 {
8084                   group->doloop_p = true;
8085                   if (dump_file && (dump_flags & TDF_DETAILS))
8086                     {
8087                       fprintf (dump_file, "Doloop cmp iv use: ");
8088                       print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8089                     }
8090                   return true;
8091                 }
8092             }
8093         }
8094     }
8095
8096   return false;
8097 }
8098
8099 /* For the targets which support doloop, to predict whether later RTL doloop
8100    transformation will perform on this loop, further detect the doloop use and
8101    mark the flag doloop_use_p if predicted.  */
8102
8103 void
8104 analyze_and_mark_doloop_use (struct ivopts_data *data)
8105 {
8106   data->doloop_use_p = false;
8107
8108   if (!flag_branch_on_count_reg)
8109     return;
8110
8111   if (data->current_loop->unroll == USHRT_MAX)
8112     return;
8113
8114   if (!generic_predict_doloop_p (data))
8115     return;
8116
8117   if (find_doloop_use (data))
8118     {
8119       data->doloop_use_p = true;
8120       if (dump_file && (dump_flags & TDF_DETAILS))
8121         {
8122           struct loop *loop = data->current_loop;
8123           fprintf (dump_file,
8124                    "Predict loop %d can perform"
8125                    " doloop optimization later.\n",
8126                    loop->num);
8127           flow_loop_dump (loop, dump_file, NULL, 1);
8128         }
8129     }
8130 }
8131
8132 /* Optimizes the LOOP.  Returns true if anything changed.  */
8133
8134 static bool
8135 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8136                            bitmap toremove)
8137 {
8138   bool changed = false;
8139   class iv_ca *iv_ca;
8140   edge exit = single_dom_exit (loop);
8141   basic_block *body;
8142
8143   gcc_assert (!data->niters);
8144   data->current_loop = loop;
8145   data->loop_loc = find_loop_location (loop).get_location_t ();
8146   data->speed = optimize_loop_for_speed_p (loop);
8147
8148   if (dump_file && (dump_flags & TDF_DETAILS))
8149     {
8150       fprintf (dump_file, "Processing loop %d", loop->num);
8151       if (data->loop_loc != UNKNOWN_LOCATION)
8152         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8153                  LOCATION_LINE (data->loop_loc));
8154       fprintf (dump_file, "\n");
8155
8156       if (exit)
8157         {
8158           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
8159                    exit->src->index, exit->dest->index);
8160           print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8161                              0, TDF_SLIM);
8162           fprintf (dump_file, "\n");
8163         }
8164
8165       fprintf (dump_file, "\n");
8166     }
8167
8168   body = get_loop_body (loop);
8169   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8170   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8171
8172   data->loop_single_exit_p
8173     = exit != NULL && loop_only_exit_p (loop, body, exit);
8174
8175   /* For each ssa name determines whether it behaves as an induction variable
8176      in some loop.  */
8177   if (!find_induction_variables (data, body))
8178     goto finish;
8179
8180   /* Finds interesting uses (item 1).  */
8181   find_interesting_uses (data, body);
8182   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8183     goto finish;
8184
8185   /* Determine cost scaling factor for basic blocks in loop.  */
8186   determine_scaling_factor (data, body);
8187
8188   /* Analyze doloop possibility and mark the doloop use if predicted.  */
8189   analyze_and_mark_doloop_use (data);
8190
8191   /* Finds candidates for the induction variables (item 2).  */
8192   find_iv_candidates (data);
8193
8194   /* Calculates the costs (item 3, part 1).  */
8195   determine_iv_costs (data);
8196   determine_group_iv_costs (data);
8197   determine_set_costs (data);
8198
8199   /* Find the optimal set of induction variables (item 3, part 2).  */
8200   iv_ca = find_optimal_iv_set (data);
8201   /* Cleanup basic block aux field.  */
8202   for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8203     body[i]->aux = NULL;
8204   if (!iv_ca)
8205     goto finish;
8206   changed = true;
8207
8208   /* Create the new induction variables (item 4, part 1).  */
8209   create_new_ivs (data, iv_ca);
8210   iv_ca_free (&iv_ca);
8211
8212   /* Rewrite the uses (item 4, part 2).  */
8213   rewrite_groups (data);
8214
8215   /* Remove the ivs that are unused after rewriting.  */
8216   remove_unused_ivs (data, toremove);
8217
8218 finish:
8219   free (body);
8220   free_loop_data (data);
8221
8222   return changed;
8223 }
8224
8225 /* Main entry point.  Optimizes induction variables in loops.  */
8226
8227 void
8228 tree_ssa_iv_optimize (void)
8229 {
8230   struct ivopts_data data;
8231   auto_bitmap toremove;
8232
8233   tree_ssa_iv_optimize_init (&data);
8234   mark_ssa_maybe_undefs ();
8235
8236   /* Optimize the loops starting with the innermost ones.  */
8237   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8238     {
8239       if (!dbg_cnt (ivopts_loop))
8240         continue;
8241
8242       if (dump_file && (dump_flags & TDF_DETAILS))
8243         flow_loop_dump (loop, dump_file, NULL, 1);
8244
8245       tree_ssa_iv_optimize_loop (&data, loop, toremove);
8246     }
8247
8248   /* Remove eliminated IV defs.  */
8249   release_defs_bitset (toremove);
8250
8251   /* We have changed the structure of induction variables; it might happen
8252      that definitions in the scev database refer to some of them that were
8253      eliminated.  */
8254   scev_reset_htab ();
8255   /* Likewise niter and control-IV information.  */
8256   free_numbers_of_iterations_estimates (cfun);
8257
8258   tree_ssa_iv_optimize_finalize (&data);
8259 }
8260
8261 #include "gt-tree-ssa-loop-ivopts.h"