gcc/tree-ssa-reassoc.cc

   1 /* Reassociation for trees.
   2    Copyright (C) 2005-2025 Free Software Foundation, Inc.
   3    Contributed by Daniel Berlin <dan@dberlin.org>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GCC is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "target.h"
  26 #include "rtl.h"
  27 #include "tree.h"
  28 #include "gimple.h"
  29 #include "cfghooks.h"
  30 #include "alloc-pool.h"
  31 #include "tree-pass.h"
  32 #include "memmodel.h"
  33 #include "tm_p.h"
  34 #include "ssa.h"
  35 #include "optabs-tree.h"
  36 #include "gimple-pretty-print.h"
  37 #include "diagnostic-core.h"
  38 #include "fold-const.h"
  39 #include "stor-layout.h"
  40 #include "cfganal.h"
  41 #include "gimple-iterator.h"
  42 #include "gimple-fold.h"
  43 #include "tree-eh.h"
  44 #include "gimplify-me.h"
  45 #include "tree-cfg.h"
  46 #include "tree-ssa-loop.h"
  47 #include "flags.h"
  48 #include "tree-ssa.h"
  49 #include "langhooks.h"
  50 #include "cfgloop.h"
  51 #include "builtins.h"
  52 #include "gimplify.h"
  53 #include "case-cfn-macros.h"
  54 #include "tree-ssa-reassoc.h"
  55 #include "tree-ssa-math-opts.h"
  56 #include "gimple-range.h"
  57 #include "internal-fn.h"
  58
  59 /*  This is a simple global reassociation pass.  It is, in part, based
  60     on the LLVM pass of the same name (They do some things more/less
  61     than we do, in different orders, etc).
  62
  63     It consists of five steps:
  64
  65     1. Breaking up subtract operations into addition + negate, where
  66     it would promote the reassociation of adds.
  67
  68     2. Left linearization of the expression trees, so that (A+B)+(C+D)
  69     becomes (((A+B)+C)+D), which is easier for us to rewrite later.
  70     During linearization, we place the operands of the binary
  71     expressions into a vector of operand_entry_*
  72
  73     3. Optimization of the operand lists, eliminating things like a +
  74     -a, a & a, etc.
  75
  76     3a. Combine repeated factors with the same occurrence counts
  77     into a __builtin_powi call that will later be optimized into
  78     an optimal number of multiplies.
  79
  80     4. Rewrite the expression trees we linearized and optimized so
  81     they are in proper rank order.
  82
  83     5. Repropagate negates, as nothing else will clean it up ATM.
  84
  85     A bit of theory on #4, since nobody seems to write anything down
  86     about why it makes sense to do it the way they do it:
  87
  88     We could do this much nicer theoretically, but don't (for reasons
  89     explained after how to do it theoretically nice :P).
  90
  91     In order to promote the most redundancy elimination, you want
  92     binary expressions whose operands are the same rank (or
  93     preferably, the same value) exposed to the redundancy eliminator,
  94     for possible elimination.
  95
  96     So the way to do this if we really cared, is to build the new op
  97     tree from the leaves to the roots, merging as you go, and putting the
  98     new op on the end of the worklist, until you are left with one
  99     thing on the worklist.
 100
 101     IE if you have to rewrite the following set of operands (listed with
 102     rank in parentheses), with opcode PLUS_EXPR:
 103
 104     a (1),  b (1),  c (1),  d (2), e (2)
 105
 106
 107     We start with our merge worklist empty, and the ops list with all of
 108     those on it.
 109
 110     You want to first merge all leaves of the same rank, as much as
 111     possible.
 112
 113     So first build a binary op of
 114
 115     mergetmp = a + b, and put "mergetmp" on the merge worklist.
 116
 117     Because there is no three operand form of PLUS_EXPR, c is not going to
 118     be exposed to redundancy elimination as a rank 1 operand.
 119
 120     So you might as well throw it on the merge worklist (you could also
 121     consider it to now be a rank two operand, and merge it with d and e,
 122     but in this case, you then have evicted e from a binary op. So at
 123     least in this situation, you can't win.)
 124
 125     Then build a binary op of d + e
 126     mergetmp2 = d + e
 127
 128     and put mergetmp2 on the merge worklist.
 129
 130     so merge worklist = {mergetmp, c, mergetmp2}
 131
 132     Continue building binary ops of these operations until you have only
 133     one operation left on the worklist.
 134
 135     So we have
 136
 137     build binary op
 138     mergetmp3 = mergetmp + c
 139
 140     worklist = {mergetmp2, mergetmp3}
 141
 142     mergetmp4 = mergetmp2 + mergetmp3
 143
 144     worklist = {mergetmp4}
 145
 146     because we have one operation left, we can now just set the original
 147     statement equal to the result of that operation.
 148
 149     This will at least expose a + b  and d + e to redundancy elimination
 150     as binary operations.
 151
 152     For extra points, you can reuse the old statements to build the
 153     mergetmps, since you shouldn't run out.
 154
 155     So why don't we do this?
 156
 157     Because it's expensive, and rarely will help.  Most trees we are
 158     reassociating have 3 or less ops.  If they have 2 ops, they already
 159     will be written into a nice single binary op.  If you have 3 ops, a
 160     single simple check suffices to tell you whether the first two are of the
 161     same rank.  If so, you know to order it
 162
 163     mergetmp = op1 + op2
 164     newstmt = mergetmp + op3
 165
 166     instead of
 167     mergetmp = op2 + op3
 168     newstmt = mergetmp + op1
 169
 170     If all three are of the same rank, you can't expose them all in a
 171     single binary operator anyway, so the above is *still* the best you
 172     can do.
 173
 174     Thus, this is what we do.  When we have three ops left, we check to see
 175     what order to put them in, and call it a day.  As a nod to vector sum
 176     reduction, we check if any of the ops are really a phi node that is a
 177     destructive update for the associating op, and keep the destructive
 178     update together for vector sum reduction recognition.  */
 179
 180 /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
 181    point 3a in the pass header comment.  */
 182 static bool reassoc_insert_powi_p;
 183
 184 /* Enable biasing ranks of loop accumulators.  We don't want this before
 185    vectorization, since it interferes with reduction chains.  */
 186 static bool reassoc_bias_loop_carried_phi_ranks_p;
 187
 188 /* Statistics */
 189 static struct
 190 {
 191   int linearized;
 192   int constants_eliminated;
 193   int ops_eliminated;
 194   int rewritten;
 195   int pows_encountered;
 196   int pows_created;
 197 } reassociate_stats;
 198
 199
 200 static object_allocator<operand_entry> operand_entry_pool
 201   ("operand entry pool");
 202
 203 /* This is used to assign a unique ID to each struct operand_entry
 204    so that qsort results are identical on different hosts.  */
 205 static unsigned int next_operand_entry_id;
 206
 207 /* Starting rank number for a given basic block, so that we can rank
 208    operations using unmovable instructions in that BB based on the bb
 209    depth.  */
 210 static int64_t *bb_rank;
 211
 212 /* Operand->rank hashtable.  */
 213 static hash_map<tree, int64_t> *operand_rank;
 214
 215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
 216    biased.  */
 217 static auto_bitmap biased_names;
 218
 219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
 220    all basic blocks the CFG should be adjusted - basic blocks
 221    split right after that SSA_NAME's definition statement and before
 222    the only use, which must be a bit ior.  */
 223 static vec<tree> reassoc_branch_fixups;
 224
 225 /* Forward decls.  */
 226 static int64_t get_rank (tree);
 227 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
 228
 229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
 230    possibly added by gsi_remove.  */
 231
 232 static bool
 233 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
 234 {
 235   gimple *stmt = gsi_stmt (*gsi);
 236
 237   if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
 238     return gsi_remove (gsi, true);
 239
 240   gimple_stmt_iterator prev = *gsi;
 241   gsi_prev (&prev);
 242   unsigned uid = gimple_uid (stmt);
 243   basic_block bb = gimple_bb (stmt);
 244   bool ret = gsi_remove (gsi, true);
 245   if (!gsi_end_p (prev))
 246     gsi_next (&prev);
 247   else
 248     prev = gsi_start_bb (bb);
 249   gimple *end_stmt = gsi_stmt (*gsi);
 250   while ((stmt = gsi_stmt (prev)) != end_stmt)
 251     {
 252       gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
 253       gimple_set_uid (stmt, uid);
 254       gsi_next (&prev);
 255     }
 256   return ret;
 257 }
 258
 259 /* Bias amount for loop-carried phis.  We want this to be larger than
 260    the depth of any reassociation tree we can see, but not larger than
 261    the rank difference between two blocks.  */
 262 #define PHI_LOOP_BIAS (1 << 15)
 263
 264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
 265    operands to the STMT's left-hand side.  The goal is to preserve bias in code
 266    like this:
 267
 268      x_1 = phi(x_0, x_2)
 269      a = x_1 | 1
 270      b = a ^ 2
 271      .MEM = b
 272      c = b + d
 273      x_2 = c + e
 274
 275    That is, we need to preserve bias along single-use chains originating from
 276    loop-carried phis.  Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
 277    uses, because only they participate in rank propagation.  */
 278 static bool
 279 propagate_bias_p (gimple *stmt)
 280 {
 281   use_operand_p use;
 282   imm_use_iterator use_iter;
 283   gimple *single_use_stmt = NULL;
 284
 285   if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
 286     return false;
 287
 288   FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
 289     {
 290       gimple *current_use_stmt = USE_STMT (use);
 291
 292       if (is_gimple_assign (current_use_stmt)
 293           && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
 294         {
 295           if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
 296             return false;
 297           single_use_stmt = current_use_stmt;
 298         }
 299     }
 300
 301   if (single_use_stmt == NULL)
 302     return false;
 303
 304   if (gimple_bb (stmt)->loop_father
 305       != gimple_bb (single_use_stmt)->loop_father)
 306     return false;
 307
 308   return true;
 309 }
 310
 311 /* Rank assigned to a phi statement.  If STMT is a loop-carried phi of
 312    an innermost loop, and the phi has only a single use which is inside
 313    the loop, then the rank is the block rank of the loop latch plus an
 314    extra bias for the loop-carried dependence.  This causes expressions
 315    calculated into an accumulator variable to be independent for each
 316    iteration of the loop.  If STMT is some other phi, the rank is the
 317    block rank of its containing block.  */
 318 static int64_t
 319 phi_rank (gimple *stmt)
 320 {
 321   basic_block bb = gimple_bb (stmt);
 322   class loop *father = bb->loop_father;
 323   tree res;
 324   unsigned i;
 325   use_operand_p use;
 326   gimple *use_stmt;
 327
 328   if (!reassoc_bias_loop_carried_phi_ranks_p)
 329     return bb_rank[bb->index];
 330
 331   /* We only care about real loops (those with a latch).  */
 332   if (!father->latch)
 333     return bb_rank[bb->index];
 334
 335   /* Interesting phis must be in headers of innermost loops.  */
 336   if (bb != father->header
 337       || father->inner)
 338     return bb_rank[bb->index];
 339
 340   /* Ignore virtual SSA_NAMEs.  */
 341   res = gimple_phi_result (stmt);
 342   if (virtual_operand_p (res))
 343     return bb_rank[bb->index];
 344
 345   /* The phi definition must have a single use, and that use must be
 346      within the loop.  Otherwise this isn't an accumulator pattern.  */
 347   if (!single_imm_use (res, &use, &use_stmt)
 348       || gimple_bb (use_stmt)->loop_father != father)
 349     return bb_rank[bb->index];
 350
 351   /* Look for phi arguments from within the loop.  If found, bias this phi.  */
 352   for (i = 0; i < gimple_phi_num_args (stmt); i++)
 353     {
 354       tree arg = gimple_phi_arg_def (stmt, i);
 355       if (TREE_CODE (arg) == SSA_NAME
 356           && !SSA_NAME_IS_DEFAULT_DEF (arg))
 357         {
 358           gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
 359           if (gimple_bb (def_stmt)->loop_father == father)
 360             return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
 361         }
 362     }
 363
 364   /* Must be an uninteresting phi.  */
 365   return bb_rank[bb->index];
 366 }
 367
 368 /* Return the maximum of RANK and the rank that should be propagated
 369    from expression OP.  For most operands, this is just the rank of OP.
 370    For loop-carried phis, the value is zero to avoid undoing the bias
 371    in favor of the phi.  */
 372 static int64_t
 373 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
 374 {
 375   int64_t op_rank;
 376
 377   op_rank = get_rank (op);
 378
 379   /* Check whether op is biased after the get_rank () call, since it might have
 380      updated biased_names.  */
 381   if (TREE_CODE (op) == SSA_NAME
 382       && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
 383     {
 384       if (maybe_biased_p == NULL)
 385         return rank;
 386       *maybe_biased_p = true;
 387     }
 388
 389   return MAX (rank, op_rank);
 390 }
 391
 392 /* Look up the operand rank structure for expression E.  */
 393
 394 static inline int64_t
 395 find_operand_rank (tree e)
 396 {
 397   int64_t *slot = operand_rank->get (e);
 398   return slot ? *slot : -1;
 399 }
 400
 401 /* Insert {E,RANK} into the operand rank hashtable.  */
 402
 403 static inline void
 404 insert_operand_rank (tree e, int64_t rank)
 405 {
 406   gcc_assert (rank > 0);
 407   bool existed = operand_rank->put (e, rank);
 408   gcc_assert (!existed);
 409 }
 410
 411 /* Given an expression E, return the rank of the expression.  */
 412
 413 static int64_t
 414 get_rank (tree e)
 415 {
 416   /* SSA_NAME's have the rank of the expression they are the result
 417      of.
 418      For globals and uninitialized values, the rank is 0.
 419      For function arguments, use the pre-setup rank.
 420      For PHI nodes, stores, asm statements, etc, we use the rank of
 421      the BB.
 422      For simple operations, the rank is the maximum rank of any of
 423      its operands, or the bb_rank, whichever is less.
 424      I make no claims that this is optimal, however, it gives good
 425      results.  */
 426
 427   /* We make an exception to the normal ranking system to break
 428      dependences of accumulator variables in loops.  Suppose we
 429      have a simple one-block loop containing:
 430
 431        x_1 = phi(x_0, x_2)
 432        b = a + x_1
 433        c = b + d
 434        x_2 = c + e
 435
 436      As shown, each iteration of the calculation into x is fully
 437      dependent upon the iteration before it.  We would prefer to
 438      see this in the form:
 439
 440        x_1 = phi(x_0, x_2)
 441        b = a + d
 442        c = b + e
 443        x_2 = c + x_1
 444
 445      If the loop is unrolled, the calculations of b and c from
 446      different iterations can be interleaved.
 447
 448      To obtain this result during reassociation, we bias the rank
 449      of the phi definition x_1 upward, when it is recognized as an
 450      accumulator pattern.  The artificial rank causes it to be
 451      added last, providing the desired independence.  */
 452
 453   if (TREE_CODE (e) == SSA_NAME)
 454     {
 455       ssa_op_iter iter;
 456       gimple *stmt;
 457       int64_t rank;
 458       tree op;
 459
 460       /* If we already have a rank for this expression, use that.  */
 461       rank = find_operand_rank (e);
 462       if (rank != -1)
 463         return rank;
 464
 465       stmt = SSA_NAME_DEF_STMT (e);
 466       if (gimple_code (stmt) == GIMPLE_PHI)
 467         {
 468           rank = phi_rank (stmt);
 469           if (rank != bb_rank[gimple_bb (stmt)->index])
 470             bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
 471         }
 472
 473       else if (!is_gimple_assign (stmt))
 474         rank = bb_rank[gimple_bb (stmt)->index];
 475
 476       else
 477         {
 478           bool biased_p = false;
 479           bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
 480
 481           /* Otherwise, find the maximum rank for the operands.  As an
 482              exception, remove the bias from loop-carried phis when propagating
 483              the rank so that dependent operations are not also biased.  */
 484           /* Simply walk over all SSA uses - this takes advatage of the
 485              fact that non-SSA operands are is_gimple_min_invariant and
 486              thus have rank 0.  */
 487           rank = 0;
 488           FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
 489             rank = propagate_rank (rank, op, maybe_biased_p);
 490
 491           rank += 1;
 492           if (biased_p)
 493             bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
 494         }
 495
 496       if (dump_file && (dump_flags & TDF_DETAILS))
 497         {
 498           fprintf (dump_file, "Rank for ");
 499           print_generic_expr (dump_file, e);
 500           fprintf (dump_file, " is %" PRId64 "\n", rank);
 501         }
 502
 503       /* Note the rank in the hashtable so we don't recompute it.  */
 504       insert_operand_rank (e, rank);
 505       return rank;
 506     }
 507
 508   /* Constants, globals, etc., are rank 0 */
 509   return 0;
 510 }
 511
 512
 513 /* We want integer ones to end up last no matter what, since they are
 514    the ones we can do the most with.  */
 515 #define INTEGER_CONST_TYPE 1 << 4
 516 #define FLOAT_ONE_CONST_TYPE 1 << 3
 517 #define FLOAT_CONST_TYPE 1 << 2
 518 #define OTHER_CONST_TYPE 1 << 1
 519
 520 /* Classify an invariant tree into integer, float, or other, so that
 521    we can sort them to be near other constants of the same type.  */
 522 static inline int
 523 constant_type (tree t)
 524 {
 525   if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
 526     return INTEGER_CONST_TYPE;
 527   else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
 528     {
 529       /* Sort -1.0 and 1.0 constants last, while in some cases
 530          const_binop can't optimize some inexact operations, multiplication
 531          by -1.0 or 1.0 can be always merged with others.  */
 532       if (real_onep (t) || real_minus_onep (t))
 533         return FLOAT_ONE_CONST_TYPE;
 534       return FLOAT_CONST_TYPE;
 535     }
 536   else
 537     return OTHER_CONST_TYPE;
 538 }
 539
 540 /* qsort comparison function to sort operand entries PA and PB by rank
 541    so that the sorted array is ordered by rank in decreasing order.  */
 542 static int
 543 sort_by_operand_rank (const void *pa, const void *pb)
 544 {
 545   const operand_entry *oea = *(const operand_entry *const *)pa;
 546   const operand_entry *oeb = *(const operand_entry *const *)pb;
 547
 548   if (oeb->rank != oea->rank)
 549     return oeb->rank > oea->rank ? 1 : -1;
 550
 551   /* It's nicer for optimize_expression if constants that are likely
 552      to fold when added/multiplied/whatever are put next to each
 553      other.  Since all constants have rank 0, order them by type.  */
 554   if (oea->rank == 0)
 555     {
 556       if (constant_type (oeb->op) != constant_type (oea->op))
 557         return constant_type (oea->op) - constant_type (oeb->op);
 558       else
 559         /* To make sorting result stable, we use unique IDs to determine
 560            order.  */
 561         return oeb->id > oea->id ? 1 : -1;
 562     }
 563
 564   if (TREE_CODE (oea->op) != SSA_NAME)
 565     {
 566       if (TREE_CODE (oeb->op) != SSA_NAME)
 567         return oeb->id > oea->id ? 1 : -1;
 568       else
 569         return 1;
 570     }
 571   else if (TREE_CODE (oeb->op) != SSA_NAME)
 572     return -1;
 573
 574   /* Lastly, make sure the versions that are the same go next to each
 575      other.  */
 576   if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
 577     {
 578       /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
 579          versions of removed SSA_NAMEs, so if possible, prefer to sort
 580          based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
 581          See PR60418.  */
 582       gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
 583       gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
 584       basic_block bba = gimple_bb (stmta);
 585       basic_block bbb = gimple_bb (stmtb);
 586       if (bbb != bba)
 587         {
 588           /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
 589              but the other might not.  */
 590           if (!bba)
 591             return 1;
 592           if (!bbb)
 593             return -1;
 594           /* If neither is, compare bb_rank.  */
 595           if (bb_rank[bbb->index] != bb_rank[bba->index])
 596             return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
 597         }
 598
 599       bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
 600       bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
 601       if (da != db)
 602         return da ? 1 : -1;
 603
 604       return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
 605     }
 606
 607   return oeb->id > oea->id ? 1 : -1;
 608 }
 609
 610 /* Add an operand entry to *OPS for the tree operand OP.  */
 611
 612 static void
 613 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
 614 {
 615   operand_entry *oe = operand_entry_pool.allocate ();
 616
 617   oe->op = op;
 618   oe->rank = get_rank (op);
 619   oe->id = next_operand_entry_id++;
 620   oe->count = 1;
 621   oe->stmt_to_insert = stmt_to_insert;
 622   ops->safe_push (oe);
 623 }
 624
 625 /* Add an operand entry to *OPS for the tree operand OP with repeat
 626    count REPEAT.  */
 627
 628 static void
 629 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
 630                        HOST_WIDE_INT repeat)
 631 {
 632   operand_entry *oe = operand_entry_pool.allocate ();
 633
 634   oe->op = op;
 635   oe->rank = get_rank (op);
 636   oe->id = next_operand_entry_id++;
 637   oe->count = repeat;
 638   oe->stmt_to_insert = NULL;
 639   ops->safe_push (oe);
 640
 641   reassociate_stats.pows_encountered++;
 642 }
 643
 644 /* Returns true if we can associate the SSA def OP.  */
 645
 646 static bool
 647 can_reassociate_op_p (tree op)
 648 {
 649   if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
 650     return false;
 651   /* Uninitialized variables can't participate in reassociation. */
 652   if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (op))
 653     return false;
 654   /* Make sure asm goto outputs do not participate in reassociation since
 655      we have no way to find an insertion place after asm goto.  */
 656   if (TREE_CODE (op) == SSA_NAME
 657       && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
 658       && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
 659     return false;
 660   return true;
 661 }
 662
 663 /* Returns true if we can reassociate operations of TYPE.
 664    That is for integral or non-saturating fixed-point types, and for
 665    floating point type when associative-math is enabled.  */
 666
 667 static bool
 668 can_reassociate_type_p (tree type)
 669 {
 670   if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
 671       || NON_SAT_FIXED_POINT_TYPE_P (type)
 672       || (flag_associative_math && FLOAT_TYPE_P (type)))
 673     return true;
 674   return false;
 675 }
 676
 677 /* Return true if STMT is reassociable operation containing a binary
 678    operation with tree code CODE, and is inside LOOP.  */
 679
 680 static bool
 681 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
 682 {
 683   basic_block bb = gimple_bb (stmt);
 684
 685   if (gimple_bb (stmt) == NULL)
 686     return false;
 687
 688   if (!flow_bb_inside_loop_p (loop, bb))
 689     return false;
 690
 691   if (is_gimple_assign (stmt)
 692       && gimple_assign_rhs_code (stmt) == code
 693       && has_single_use (gimple_assign_lhs (stmt)))
 694     {
 695       tree rhs1 = gimple_assign_rhs1 (stmt);
 696       tree rhs2 = gimple_assign_rhs2 (stmt);
 697       if (!can_reassociate_op_p (rhs1)
 698           || (rhs2 && !can_reassociate_op_p (rhs2)))
 699         return false;
 700       return true;
 701     }
 702
 703   return false;
 704 }
 705
 706
 707 /* Return true if STMT is a nop-conversion.  */
 708
 709 static bool
 710 gimple_nop_conversion_p (gimple *stmt)
 711 {
 712   if (gassign *ass = dyn_cast <gassign *> (stmt))
 713     {
 714       if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
 715           && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
 716                                     TREE_TYPE (gimple_assign_rhs1 (ass))))
 717         return true;
 718     }
 719   return false;
 720 }
 721
 722 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
 723    operand of the negate operation.  Otherwise, return NULL.  */
 724
 725 static tree
 726 get_unary_op (tree name, enum tree_code opcode)
 727 {
 728   gimple *stmt = SSA_NAME_DEF_STMT (name);
 729
 730   /* Look through nop conversions (sign changes).  */
 731   if (gimple_nop_conversion_p (stmt)
 732       && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
 733     stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
 734
 735   if (!is_gimple_assign (stmt))
 736     return NULL_TREE;
 737
 738   if (gimple_assign_rhs_code (stmt) == opcode)
 739     return gimple_assign_rhs1 (stmt);
 740   return NULL_TREE;
 741 }
 742
 743 /* Return true if OP1 and OP2 have the same value if casted to either type.  */
 744
 745 static bool
 746 ops_equal_values_p (tree op1, tree op2)
 747 {
 748   if (op1 == op2)
 749     return true;
 750
 751   tree orig_op1 = op1;
 752   if (TREE_CODE (op1) == SSA_NAME)
 753     {
 754       gimple *stmt = SSA_NAME_DEF_STMT (op1);
 755       if (gimple_nop_conversion_p (stmt))
 756         {
 757           op1 = gimple_assign_rhs1 (stmt);
 758           if (op1 == op2)
 759             return true;
 760         }
 761     }
 762
 763   if (TREE_CODE (op2) == SSA_NAME)
 764     {
 765       gimple *stmt = SSA_NAME_DEF_STMT (op2);
 766       if (gimple_nop_conversion_p (stmt))
 767         {
 768           op2 = gimple_assign_rhs1 (stmt);
 769           if (op1 == op2
 770               || orig_op1 == op2)
 771             return true;
 772         }
 773     }
 774
 775   return false;
 776 }
 777
 778
 779 /* If CURR and LAST are a pair of ops that OPCODE allows us to
 780    eliminate through equivalences, do so, remove them from OPS, and
 781    return true.  Otherwise, return false.  */
 782
 783 static bool
 784 eliminate_duplicate_pair (enum tree_code opcode,
 785                           vec<operand_entry *> *ops,
 786                           bool *all_done,
 787                           unsigned int i,
 788                           operand_entry *curr,
 789                           operand_entry *last)
 790 {
 791
 792   /* If we have two of the same op, and the opcode is & |, min, or max,
 793      we can eliminate one of them.
 794      If we have two of the same op, and the opcode is ^, we can
 795      eliminate both of them.  */
 796
 797   if (last && last->op == curr->op)
 798     {
 799       switch (opcode)
 800         {
 801         case MAX_EXPR:
 802         case MIN_EXPR:
 803         case BIT_IOR_EXPR:
 804         case BIT_AND_EXPR:
 805           if (dump_file && (dump_flags & TDF_DETAILS))
 806             {
 807               fprintf (dump_file, "Equivalence: ");
 808               print_generic_expr (dump_file, curr->op);
 809               fprintf (dump_file, " [&|minmax] ");
 810               print_generic_expr (dump_file, last->op);
 811               fprintf (dump_file, " -> ");
 812               print_generic_stmt (dump_file, last->op);
 813             }
 814
 815           ops->ordered_remove (i);
 816           reassociate_stats.ops_eliminated ++;
 817
 818           return true;
 819
 820         case BIT_XOR_EXPR:
 821           if (dump_file && (dump_flags & TDF_DETAILS))
 822             {
 823               fprintf (dump_file, "Equivalence: ");
 824               print_generic_expr (dump_file, curr->op);
 825               fprintf (dump_file, " ^ ");
 826               print_generic_expr (dump_file, last->op);
 827               fprintf (dump_file, " -> nothing\n");
 828             }
 829
 830           reassociate_stats.ops_eliminated += 2;
 831
 832           if (ops->length () == 2)
 833             {
 834               ops->truncate (0);
 835               add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
 836               *all_done = true;
 837             }
 838           else
 839             {
 840               ops->ordered_remove (i-1);
 841               ops->ordered_remove (i-1);
 842             }
 843
 844           return true;
 845
 846         default:
 847           break;
 848         }
 849     }
 850   return false;
 851 }
 852
 853 static vec<tree> plus_negates;
 854
 855 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
 856    expression, look in OPS for a corresponding positive operation to cancel
 857    it out.  If we find one, remove the other from OPS, replace
 858    OPS[CURRINDEX] with 0 or -1, respectively, and return true.  Otherwise,
 859    return false. */
 860
 861 static bool
 862 eliminate_plus_minus_pair (enum tree_code opcode,
 863                            vec<operand_entry *> *ops,
 864                            unsigned int currindex,
 865                            operand_entry *curr)
 866 {
 867   tree negateop;
 868   tree notop;
 869   unsigned int i;
 870   operand_entry *oe;
 871
 872   if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
 873     return false;
 874
 875   negateop = get_unary_op (curr->op, NEGATE_EXPR);
 876   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 877   if (negateop == NULL_TREE && notop == NULL_TREE)
 878     return false;
 879
 880   /* Any non-negated version will have a rank that is one less than
 881      the current rank.  So once we hit those ranks, if we don't find
 882      one, we can stop.  */
 883
 884   for (i = currindex + 1;
 885        ops->iterate (i, &oe)
 886        && oe->rank >= curr->rank - 1 ;
 887        i++)
 888     {
 889       if (negateop
 890           && ops_equal_values_p (oe->op, negateop))
 891         {
 892           if (dump_file && (dump_flags & TDF_DETAILS))
 893             {
 894               fprintf (dump_file, "Equivalence: ");
 895               print_generic_expr (dump_file, negateop);
 896               fprintf (dump_file, " + -");
 897               print_generic_expr (dump_file, oe->op);
 898               fprintf (dump_file, " -> 0\n");
 899             }
 900
 901           ops->ordered_remove (i);
 902           add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
 903           ops->ordered_remove (currindex);
 904           reassociate_stats.ops_eliminated ++;
 905
 906           return true;
 907         }
 908       else if (notop
 909                && ops_equal_values_p (oe->op, notop))
 910         {
 911           tree op_type = TREE_TYPE (oe->op);
 912
 913           if (dump_file && (dump_flags & TDF_DETAILS))
 914             {
 915               fprintf (dump_file, "Equivalence: ");
 916               print_generic_expr (dump_file, notop);
 917               fprintf (dump_file, " + ~");
 918               print_generic_expr (dump_file, oe->op);
 919               fprintf (dump_file, " -> -1\n");
 920             }
 921
 922           ops->ordered_remove (i);
 923           add_to_ops_vec (ops, build_all_ones_cst (op_type));
 924           ops->ordered_remove (currindex);
 925           reassociate_stats.ops_eliminated ++;
 926
 927           return true;
 928         }
 929     }
 930
 931   /* If CURR->OP is a negate expr without nop conversion in a plus expr:
 932      save it for later inspection in repropagate_negates().  */
 933   if (negateop != NULL_TREE
 934       && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
 935     plus_negates.safe_push (curr->op);
 936
 937   return false;
 938 }
 939
 940 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
 941    bitwise not expression, look in OPS for a corresponding operand to
 942    cancel it out.  If we find one, remove the other from OPS, replace
 943    OPS[CURRINDEX] with 0, and return true.  Otherwise, return
 944    false. */
 945
 946 static bool
 947 eliminate_not_pairs (enum tree_code opcode,
 948                      vec<operand_entry *> *ops,
 949                      unsigned int currindex,
 950                      operand_entry *curr)
 951 {
 952   tree notop;
 953   unsigned int i;
 954   operand_entry *oe;
 955
 956   if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
 957       || TREE_CODE (curr->op) != SSA_NAME)
 958     return false;
 959
 960   notop = get_unary_op (curr->op, BIT_NOT_EXPR);
 961   if (notop == NULL_TREE)
 962     return false;
 963
 964   /* Any non-not version will have a rank that is one less than
 965      the current rank.  So once we hit those ranks, if we don't find
 966      one, we can stop.  */
 967
 968   for (i = currindex + 1;
 969        ops->iterate (i, &oe)
 970        && oe->rank >= curr->rank - 1;
 971        i++)
 972     {
 973       if (oe->op == notop)
 974         {
 975           if (dump_file && (dump_flags & TDF_DETAILS))
 976             {
 977               fprintf (dump_file, "Equivalence: ");
 978               print_generic_expr (dump_file, notop);
 979               if (opcode == BIT_AND_EXPR)
 980                 fprintf (dump_file, " & ~");
 981               else if (opcode == BIT_IOR_EXPR)
 982                 fprintf (dump_file, " | ~");
 983               print_generic_expr (dump_file, oe->op);
 984               if (opcode == BIT_AND_EXPR)
 985                 fprintf (dump_file, " -> 0\n");
 986               else if (opcode == BIT_IOR_EXPR)
 987                 fprintf (dump_file, " -> -1\n");
 988             }
 989
 990           if (opcode == BIT_AND_EXPR)
 991             oe->op = build_zero_cst (TREE_TYPE (oe->op));
 992           else if (opcode == BIT_IOR_EXPR)
 993             oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
 994
 995           reassociate_stats.ops_eliminated += ops->length () - 1;
 996           ops->truncate (0);
 997           ops->quick_push (oe);
 998           return true;
 999         }
1000     }
1001
1002   return false;
1003 }
1004
1005 /* Use constant value that may be present in OPS to try to eliminate
1006    operands.  Note that this function is only really used when we've
1007    eliminated ops for other reasons, or merged constants.  Across
1008    single statements, fold already does all of this, plus more.  There
1009    is little point in duplicating logic, so I've only included the
1010    identities that I could ever construct testcases to trigger.  */
1011
1012 static void
1013 eliminate_using_constants (enum tree_code opcode,
1014                            vec<operand_entry *> *ops)
1015 {
1016   operand_entry *oelast = ops->last ();
1017   tree type = TREE_TYPE (oelast->op);
1018
1019   if (oelast->rank == 0
1020       && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1021     {
1022       switch (opcode)
1023         {
1024         case BIT_AND_EXPR:
1025           if (integer_zerop (oelast->op))
1026             {
1027               if (ops->length () != 1)
1028                 {
1029                   if (dump_file && (dump_flags & TDF_DETAILS))
1030                     fprintf (dump_file, "Found & 0, removing all other ops\n");
1031
1032                   reassociate_stats.ops_eliminated += ops->length () - 1;
1033
1034                   ops->truncate (0);
1035                   ops->quick_push (oelast);
1036                   return;
1037                 }
1038             }
1039           else if (integer_all_onesp (oelast->op))
1040             {
1041               if (ops->length () != 1)
1042                 {
1043                   if (dump_file && (dump_flags & TDF_DETAILS))
1044                     fprintf (dump_file, "Found & -1, removing\n");
1045                   ops->pop ();
1046                   reassociate_stats.ops_eliminated++;
1047                 }
1048             }
1049           break;
1050         case BIT_IOR_EXPR:
1051           if (integer_all_onesp (oelast->op))
1052             {
1053               if (ops->length () != 1)
1054                 {
1055                   if (dump_file && (dump_flags & TDF_DETAILS))
1056                     fprintf (dump_file, "Found | -1, removing all other ops\n");
1057
1058                   reassociate_stats.ops_eliminated += ops->length () - 1;
1059
1060                   ops->truncate (0);
1061                   ops->quick_push (oelast);
1062                   return;
1063                 }
1064             }
1065           else if (integer_zerop (oelast->op))
1066             {
1067               if (ops->length () != 1)
1068                 {
1069                   if (dump_file && (dump_flags & TDF_DETAILS))
1070                     fprintf (dump_file, "Found | 0, removing\n");
1071                   ops->pop ();
1072                   reassociate_stats.ops_eliminated++;
1073                 }
1074             }
1075           break;
1076         case MULT_EXPR:
1077           if (integer_zerop (oelast->op)
1078               || (FLOAT_TYPE_P (type)
1079                   && !HONOR_NANS (type)
1080                   && !HONOR_SIGNED_ZEROS (type)
1081                   && real_zerop (oelast->op)))
1082             {
1083               if (ops->length () != 1)
1084                 {
1085                   if (dump_file && (dump_flags & TDF_DETAILS))
1086                     fprintf (dump_file, "Found * 0, removing all other ops\n");
1087
1088                   reassociate_stats.ops_eliminated += ops->length () - 1;
1089                   ops->truncate (0);
1090                   ops->quick_push (oelast);
1091                   return;
1092                 }
1093             }
1094           else if (integer_onep (oelast->op)
1095                    || (FLOAT_TYPE_P (type)
1096                        && !HONOR_SNANS (type)
1097                        && real_onep (oelast->op)))
1098             {
1099               if (ops->length () != 1)
1100                 {
1101                   if (dump_file && (dump_flags & TDF_DETAILS))
1102                     fprintf (dump_file, "Found * 1, removing\n");
1103                   ops->pop ();
1104                   reassociate_stats.ops_eliminated++;
1105                   return;
1106                 }
1107             }
1108           break;
1109         case BIT_XOR_EXPR:
1110         case PLUS_EXPR:
1111         case MINUS_EXPR:
1112           if (integer_zerop (oelast->op)
1113               || (FLOAT_TYPE_P (type)
1114                   && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1115                   && fold_real_zero_addition_p (type, 0, oelast->op,
1116                                                 opcode == MINUS_EXPR)))
1117             {
1118               if (ops->length () != 1)
1119                 {
1120                   if (dump_file && (dump_flags & TDF_DETAILS))
1121                     fprintf (dump_file, "Found [|^+] 0, removing\n");
1122                   ops->pop ();
1123                   reassociate_stats.ops_eliminated++;
1124                   return;
1125                 }
1126             }
1127           break;
1128         default:
1129           break;
1130         }
1131     }
1132 }
1133
1134
1135 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1136                                  bool, bool);
1137
1138 /* Structure for tracking and counting operands.  */
1139 struct oecount {
1140   unsigned int cnt;
1141   unsigned int id;
1142   enum tree_code oecode;
1143   tree op;
1144 };
1145
1146
1147 /* The heap for the oecount hashtable and the sorted list of operands.  */
1148 static vec<oecount> cvec;
1149
1150
1151 /* Oecount hashtable helpers.  */
1152
1153 struct oecount_hasher : int_hash <int, 0, 1>
1154 {
1155   static inline hashval_t hash (int);
1156   static inline bool equal (int, int);
1157 };
1158
1159 /* Hash function for oecount.  */
1160
1161 inline hashval_t
1162 oecount_hasher::hash (int p)
1163 {
1164   const oecount *c = &cvec[p - 42];
1165   return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1166 }
1167
1168 /* Comparison function for oecount.  */
1169
1170 inline bool
1171 oecount_hasher::equal (int p1, int p2)
1172 {
1173   const oecount *c1 = &cvec[p1 - 42];
1174   const oecount *c2 = &cvec[p2 - 42];
1175   return c1->oecode == c2->oecode && c1->op == c2->op;
1176 }
1177
1178 /* Comparison function for qsort sorting oecount elements by count.  */
1179
1180 static int
1181 oecount_cmp (const void *p1, const void *p2)
1182 {
1183   const oecount *c1 = (const oecount *)p1;
1184   const oecount *c2 = (const oecount *)p2;
1185   if (c1->cnt != c2->cnt)
1186     return c1->cnt > c2->cnt ? 1 : -1;
1187   else
1188     /* If counts are identical, use unique IDs to stabilize qsort.  */
1189     return c1->id > c2->id ? 1 : -1;
1190 }
1191
1192 /* Return TRUE iff STMT represents a builtin call that raises OP
1193    to some exponent.  */
1194
1195 static bool
1196 stmt_is_power_of_op (gimple *stmt, tree op)
1197 {
1198   if (!is_gimple_call (stmt))
1199     return false;
1200
1201   switch (gimple_call_combined_fn (stmt))
1202     {
1203     CASE_CFN_POW:
1204     CASE_CFN_POWI:
1205       return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1206
1207     default:
1208       return false;
1209     }
1210 }
1211
1212 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1213    in place and return the result.  Assumes that stmt_is_power_of_op
1214    was previously called for STMT and returned TRUE.  */
1215
1216 static HOST_WIDE_INT
1217 decrement_power (gimple *stmt)
1218 {
1219   REAL_VALUE_TYPE c, cint;
1220   HOST_WIDE_INT power;
1221   tree arg1;
1222
1223   switch (gimple_call_combined_fn (stmt))
1224     {
1225     CASE_CFN_POW:
1226       arg1 = gimple_call_arg (stmt, 1);
1227       c = TREE_REAL_CST (arg1);
1228       power = real_to_integer (&c) - 1;
1229       real_from_integer (&cint, VOIDmode, power, SIGNED);
1230       gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1231       return power;
1232
1233     CASE_CFN_POWI:
1234       arg1 = gimple_call_arg (stmt, 1);
1235       power = TREE_INT_CST_LOW (arg1) - 1;
1236       gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1237       return power;
1238
1239     default:
1240       gcc_unreachable ();
1241     }
1242 }
1243
1244 /* Replace SSA defined by STMT and replace all its uses with new
1245    SSA.  Also return the new SSA.  */
1246
1247 static tree
1248 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1249 {
1250   gimple *use_stmt;
1251   use_operand_p use;
1252   imm_use_iterator iter;
1253   tree new_lhs, new_debug_lhs = NULL_TREE;
1254   tree lhs = gimple_get_lhs (stmt);
1255
1256   new_lhs = make_ssa_name (TREE_TYPE (lhs));
1257   gimple_set_lhs (stmt, new_lhs);
1258
1259   /* Also need to update GIMPLE_DEBUGs.  */
1260   FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1261     {
1262       tree repl = new_lhs;
1263       if (is_gimple_debug (use_stmt))
1264         {
1265           if (new_debug_lhs == NULL_TREE)
1266             {
1267               new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1268               gdebug *def_temp
1269                 = gimple_build_debug_bind (new_debug_lhs,
1270                                            build2 (opcode, TREE_TYPE (lhs),
1271                                                    new_lhs, op),
1272                                            stmt);
1273               gimple_set_uid (def_temp, gimple_uid (stmt));
1274               gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1275               gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1276             }
1277           repl = new_debug_lhs;
1278         }
1279       FOR_EACH_IMM_USE_ON_STMT (use, iter)
1280         SET_USE (use, repl);
1281       update_stmt (use_stmt);
1282     }
1283   return new_lhs;
1284 }
1285
1286 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1287    uses with new SSAs.  Also do this for the stmt that defines DEF
1288    if *DEF is not OP.  */
1289
1290 static void
1291 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1292                            vec<gimple *> &stmts_to_fix)
1293 {
1294   unsigned i;
1295   gimple *stmt;
1296
1297   if (*def != op
1298       && TREE_CODE (*def) == SSA_NAME
1299       && (stmt = SSA_NAME_DEF_STMT (*def))
1300       && gimple_code (stmt) != GIMPLE_NOP)
1301     *def = make_new_ssa_for_def (stmt, opcode, op);
1302
1303   FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1304     make_new_ssa_for_def (stmt, opcode, op);
1305 }
1306
1307 /* Find the single immediate use of STMT's LHS, and replace it
1308    with OP.  Remove STMT.  If STMT's LHS is the same as *DEF,
1309    replace *DEF with OP as well.  */
1310
1311 static void
1312 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1313 {
1314   tree lhs;
1315   gimple *use_stmt;
1316   use_operand_p use;
1317   gimple_stmt_iterator gsi;
1318
1319   if (is_gimple_call (stmt))
1320     lhs = gimple_call_lhs (stmt);
1321   else
1322     lhs = gimple_assign_lhs (stmt);
1323
1324   gcc_assert (has_single_use (lhs));
1325   single_imm_use (lhs, &use, &use_stmt);
1326   if (lhs == *def)
1327     *def = op;
1328   SET_USE (use, op);
1329   if (TREE_CODE (op) != SSA_NAME)
1330     update_stmt (use_stmt);
1331   gsi = gsi_for_stmt (stmt);
1332   unlink_stmt_vdef (stmt);
1333   reassoc_remove_stmt (&gsi);
1334   release_defs (stmt);
1335 }
1336
1337 /* Walks the linear chain with result *DEF searching for an operation
1338    with operand OP and code OPCODE removing that from the chain.  *DEF
1339    is updated if there is only one operand but no operation left.  */
1340
1341 static void
1342 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1343 {
1344   tree orig_def = *def;
1345   gimple *stmt = SSA_NAME_DEF_STMT (*def);
1346   /* PR72835 - Record the stmt chain that has to be updated such that
1347      we dont use the same LHS when the values computed are different.  */
1348   auto_vec<gimple *, 64> stmts_to_fix;
1349
1350   do
1351     {
1352       tree name;
1353
1354       if (opcode == MULT_EXPR)
1355         {
1356           if (stmt_is_power_of_op (stmt, op))
1357             {
1358               if (decrement_power (stmt) == 1)
1359                 {
1360                   if (stmts_to_fix.length () > 0)
1361                     stmts_to_fix.pop ();
1362                   propagate_op_to_single_use (op, stmt, def);
1363                 }
1364               break;
1365             }
1366           else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1367             {
1368               if (gimple_assign_rhs1 (stmt) == op)
1369                 {
1370                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1371                   if (stmts_to_fix.length () > 0)
1372                     stmts_to_fix.pop ();
1373                   propagate_op_to_single_use (cst, stmt, def);
1374                   break;
1375                 }
1376               else if (integer_minus_onep (op)
1377                        || real_minus_onep (op))
1378                 {
1379                   gimple_assign_set_rhs_code
1380                     (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1381                   break;
1382                 }
1383             }
1384         }
1385
1386       name = gimple_assign_rhs1 (stmt);
1387
1388       /* If this is the operation we look for and one of the operands
1389          is ours simply propagate the other operand into the stmts
1390          single use.  */
1391       if (gimple_assign_rhs_code (stmt) == opcode
1392           && (name == op
1393               || gimple_assign_rhs2 (stmt) == op))
1394         {
1395           if (name == op)
1396             name = gimple_assign_rhs2 (stmt);
1397           if (stmts_to_fix.length () > 0)
1398             stmts_to_fix.pop ();
1399           propagate_op_to_single_use (name, stmt, def);
1400           break;
1401         }
1402
1403       /* We might have a multiply of two __builtin_pow* calls, and
1404          the operand might be hiding in the rightmost one.  Likewise
1405          this can happen for a negate.  */
1406       if (opcode == MULT_EXPR
1407           && gimple_assign_rhs_code (stmt) == opcode
1408           && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1409           && has_single_use (gimple_assign_rhs2 (stmt)))
1410         {
1411           gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1412           if (stmt_is_power_of_op (stmt2, op))
1413             {
1414               if (decrement_power (stmt2) == 1)
1415                 propagate_op_to_single_use (op, stmt2, def);
1416               else
1417                 stmts_to_fix.safe_push (stmt2);
1418               break;
1419             }
1420           else if (is_gimple_assign (stmt2)
1421                    && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1422             {
1423               if (gimple_assign_rhs1 (stmt2) == op)
1424                 {
1425                   tree cst = build_minus_one_cst (TREE_TYPE (op));
1426                   propagate_op_to_single_use (cst, stmt2, def);
1427                   break;
1428                 }
1429               else if (integer_minus_onep (op)
1430                        || real_minus_onep (op))
1431                 {
1432                   stmts_to_fix.safe_push (stmt2);
1433                   gimple_assign_set_rhs_code
1434                     (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1435                   break;
1436                 }
1437             }
1438         }
1439
1440       /* Continue walking the chain.  */
1441       gcc_assert (name != op
1442                   && TREE_CODE (name) == SSA_NAME);
1443       stmt = SSA_NAME_DEF_STMT (name);
1444       stmts_to_fix.safe_push (stmt);
1445     }
1446   while (1);
1447
1448   if (stmts_to_fix.length () > 0 || *def == orig_def)
1449     make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1450 }
1451
1452 /* Returns true if statement S1 dominates statement S2.  Like
1453    stmt_dominates_stmt_p, but uses stmt UIDs to optimize.  */
1454
1455 static bool
1456 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1457 {
1458   basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1459
1460   /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1461      SSA_NAME.  Assume it lives at the beginning of function and
1462      thus dominates everything.  */
1463   if (!bb1 || s1 == s2)
1464     return true;
1465
1466   /* If bb2 is NULL, it doesn't dominate any stmt with a bb.  */
1467   if (!bb2)
1468     return false;
1469
1470   if (bb1 == bb2)
1471     {
1472       /* PHIs in the same basic block are assumed to be
1473          executed all in parallel, if only one stmt is a PHI,
1474          it dominates the other stmt in the same basic block.  */
1475       if (gimple_code (s1) == GIMPLE_PHI)
1476         return true;
1477
1478       if (gimple_code (s2) == GIMPLE_PHI)
1479         return false;
1480
1481       gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1482
1483       if (gimple_uid (s1) < gimple_uid (s2))
1484         return true;
1485
1486       if (gimple_uid (s1) > gimple_uid (s2))
1487         return false;
1488
1489       gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1490       unsigned int uid = gimple_uid (s1);
1491       for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1492         {
1493           gimple *s = gsi_stmt (gsi);
1494           if (gimple_uid (s) != uid)
1495             break;
1496           if (s == s2)
1497             return true;
1498         }
1499
1500       return false;
1501     }
1502
1503   return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1504 }
1505
1506 /* Insert STMT after INSERT_POINT.  */
1507
1508 static void
1509 insert_stmt_after (gimple *stmt, gimple *insert_point)
1510 {
1511   gimple_stmt_iterator gsi;
1512   basic_block bb;
1513
1514   if (gimple_code (insert_point) == GIMPLE_PHI)
1515     bb = gimple_bb (insert_point);
1516   else if (!stmt_ends_bb_p (insert_point))
1517     {
1518       gsi = gsi_for_stmt (insert_point);
1519       gimple_set_uid (stmt, gimple_uid (insert_point));
1520       gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1521       return;
1522     }
1523   else if (gimple_code (insert_point) == GIMPLE_ASM
1524            && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1525     /* We have no idea where to insert - it depends on where the
1526        uses will be placed.  */
1527     gcc_unreachable ();
1528   else
1529     /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1530        thus if it must end a basic block, it should be a call that can
1531        throw, or some assignment that can throw.  If it throws, the LHS
1532        of it will not be initialized though, so only valid places using
1533        the SSA_NAME should be dominated by the fallthru edge.  */
1534     bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1535   gsi = gsi_after_labels (bb);
1536   if (gsi_end_p (gsi))
1537     {
1538       gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1539       gimple_set_uid (stmt,
1540                       gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1541     }
1542   else
1543     gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1544   gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1545 }
1546
1547 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1548    the result.  Places the statement after the definition of either
1549    OP1 or OP2.  Returns the new statement.  */
1550
1551 static gimple *
1552 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1553 {
1554   gimple *op1def = NULL, *op2def = NULL;
1555   gimple_stmt_iterator gsi;
1556   tree op;
1557   gassign *sum;
1558
1559   /* Create the addition statement.  */
1560   op = make_ssa_name (type);
1561   sum = gimple_build_assign (op, opcode, op1, op2);
1562
1563   /* Find an insertion place and insert.  */
1564   if (TREE_CODE (op1) == SSA_NAME)
1565     op1def = SSA_NAME_DEF_STMT (op1);
1566   if (TREE_CODE (op2) == SSA_NAME)
1567     op2def = SSA_NAME_DEF_STMT (op2);
1568   if ((!op1def || gimple_nop_p (op1def))
1569       && (!op2def || gimple_nop_p (op2def)))
1570     {
1571       gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1572       if (!gsi_end_p (gsi)
1573           && is_gimple_call (gsi_stmt (gsi))
1574           && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1575         {
1576           /* Don't add statements before a returns_twice call at the start
1577              of a function.  */
1578           split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1579           gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1580         }
1581       if (gsi_end_p (gsi))
1582         {
1583           gimple_stmt_iterator gsi2
1584             = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1585           gimple_set_uid (sum,
1586                           gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1587         }
1588       else
1589         gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1590       gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1591     }
1592   else
1593     {
1594       gimple *insert_point;
1595       if ((!op1def || gimple_nop_p (op1def))
1596            || (op2def && !gimple_nop_p (op2def)
1597                && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1598         insert_point = op2def;
1599       else
1600         insert_point = op1def;
1601       insert_stmt_after (sum, insert_point);
1602     }
1603   update_stmt (sum);
1604
1605   return sum;
1606 }
1607
1608 /* Perform un-distribution of divisions and multiplications.
1609    A * X + B * X is transformed into (A + B) * X and A / X + B / X
1610    to (A + B) / X for real X.
1611
1612    The algorithm is organized as follows.
1613
1614     - First we walk the addition chain *OPS looking for summands that
1615       are defined by a multiplication or a real division.  This results
1616       in the candidates bitmap with relevant indices into *OPS.
1617
1618     - Second we build the chains of multiplications or divisions for
1619       these candidates, counting the number of occurrences of (operand, code)
1620       pairs in all of the candidates chains.
1621
1622     - Third we sort the (operand, code) pairs by number of occurrence and
1623       process them starting with the pair with the most uses.
1624
1625       * For each such pair we walk the candidates again to build a
1626         second candidate bitmap noting all multiplication/division chains
1627         that have at least one occurrence of (operand, code).
1628
1629       * We build an alternate addition chain only covering these
1630         candidates with one (operand, code) operation removed from their
1631         multiplication/division chain.
1632
1633       * The first candidate gets replaced by the alternate addition chain
1634         multiplied/divided by the operand.
1635
1636       * All candidate chains get disabled for further processing and
1637         processing of (operand, code) pairs continues.
1638
1639   The alternate addition chains built are re-processed by the main
1640   reassociation algorithm which allows optimizing a * x * y + b * y * x
1641   to (a + b ) * x * y in one invocation of the reassociation pass.  */
1642
1643 static bool
1644 undistribute_ops_list (enum tree_code opcode,
1645                        vec<operand_entry *> *ops, class loop *loop)
1646 {
1647   unsigned int length = ops->length ();
1648   operand_entry *oe1;
1649   unsigned i, j;
1650   unsigned nr_candidates, nr_candidates2;
1651   sbitmap_iterator sbi0;
1652   vec<operand_entry *> *subops;
1653   bool changed = false;
1654   unsigned int next_oecount_id = 0;
1655
1656   if (length <= 1
1657       || opcode != PLUS_EXPR)
1658     return false;
1659
1660   /* Build a list of candidates to process.  */
1661   auto_sbitmap candidates (length);
1662   bitmap_clear (candidates);
1663   nr_candidates = 0;
1664   FOR_EACH_VEC_ELT (*ops, i, oe1)
1665     {
1666       enum tree_code dcode;
1667       gimple *oe1def;
1668
1669       if (TREE_CODE (oe1->op) != SSA_NAME)
1670         continue;
1671       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1672       if (!is_gimple_assign (oe1def))
1673         continue;
1674       dcode = gimple_assign_rhs_code (oe1def);
1675       if ((dcode != MULT_EXPR
1676            && dcode != RDIV_EXPR)
1677           || !is_reassociable_op (oe1def, dcode, loop))
1678         continue;
1679
1680       bitmap_set_bit (candidates, i);
1681       nr_candidates++;
1682     }
1683
1684   if (nr_candidates < 2)
1685     return false;
1686
1687   if (dump_file && (dump_flags & TDF_DETAILS))
1688     {
1689       fprintf (dump_file, "searching for un-distribute opportunities ");
1690       print_generic_expr (dump_file,
1691         (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1692       fprintf (dump_file, " %d\n", nr_candidates);
1693     }
1694
1695   /* Build linearized sub-operand lists and the counting table.  */
1696   cvec.create (0);
1697
1698   hash_table<oecount_hasher> ctable (15);
1699
1700   /* ??? Macro arguments cannot have multi-argument template types in
1701      them.  This typedef is needed to workaround that limitation.  */
1702   typedef vec<operand_entry *> vec_operand_entry_t_heap;
1703   subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1704   EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1705     {
1706       gimple *oedef;
1707       enum tree_code oecode;
1708       unsigned j;
1709
1710       oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1711       oecode = gimple_assign_rhs_code (oedef);
1712       linearize_expr_tree (&subops[i], oedef,
1713                            associative_tree_code (oecode), false);
1714
1715       FOR_EACH_VEC_ELT (subops[i], j, oe1)
1716         {
1717           oecount c;
1718           int *slot;
1719           int idx;
1720           c.oecode = oecode;
1721           c.cnt = 1;
1722           c.id = next_oecount_id++;
1723           c.op = oe1->op;
1724           cvec.safe_push (c);
1725           idx = cvec.length () + 41;
1726           slot = ctable.find_slot (idx, INSERT);
1727           if (!*slot)
1728             {
1729               *slot = idx;
1730             }
1731           else
1732             {
1733               cvec.pop ();
1734               cvec[*slot - 42].cnt++;
1735             }
1736         }
1737     }
1738
1739   /* Sort the counting table.  */
1740   cvec.qsort (oecount_cmp);
1741
1742   if (dump_file && (dump_flags & TDF_DETAILS))
1743     {
1744       oecount *c;
1745       fprintf (dump_file, "Candidates:\n");
1746       FOR_EACH_VEC_ELT (cvec, j, c)
1747         {
1748           fprintf (dump_file, "  %u %s: ", c->cnt,
1749                    c->oecode == MULT_EXPR
1750                    ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1751           print_generic_expr (dump_file, c->op);
1752           fprintf (dump_file, "\n");
1753         }
1754     }
1755
1756   /* Process the (operand, code) pairs in order of most occurrence.  */
1757   auto_sbitmap candidates2 (length);
1758   while (!cvec.is_empty ())
1759     {
1760       oecount *c = &cvec.last ();
1761       if (c->cnt < 2)
1762         break;
1763
1764       /* Now collect the operands in the outer chain that contain
1765          the common operand in their inner chain.  */
1766       bitmap_clear (candidates2);
1767       nr_candidates2 = 0;
1768       EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1769         {
1770           gimple *oedef;
1771           enum tree_code oecode;
1772           unsigned j;
1773           tree op = (*ops)[i]->op;
1774
1775           /* If we undistributed in this chain already this may be
1776              a constant.  */
1777           if (TREE_CODE (op) != SSA_NAME)
1778             continue;
1779
1780           oedef = SSA_NAME_DEF_STMT (op);
1781           oecode = gimple_assign_rhs_code (oedef);
1782           if (oecode != c->oecode)
1783             continue;
1784
1785           FOR_EACH_VEC_ELT (subops[i], j, oe1)
1786             {
1787               if (oe1->op == c->op)
1788                 {
1789                   bitmap_set_bit (candidates2, i);
1790                   ++nr_candidates2;
1791                   break;
1792                 }
1793             }
1794         }
1795
1796       if (nr_candidates2 >= 2)
1797         {
1798           operand_entry *oe1, *oe2;
1799           gimple *prod;
1800           int first = bitmap_first_set_bit (candidates2);
1801
1802           /* Build the new addition chain.  */
1803           oe1 = (*ops)[first];
1804           if (dump_file && (dump_flags & TDF_DETAILS))
1805             {
1806               fprintf (dump_file, "Building (");
1807               print_generic_expr (dump_file, oe1->op);
1808             }
1809           zero_one_operation (&oe1->op, c->oecode, c->op);
1810           EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1811             {
1812               gimple *sum;
1813               oe2 = (*ops)[i];
1814               if (dump_file && (dump_flags & TDF_DETAILS))
1815                 {
1816                   fprintf (dump_file, " + ");
1817                   print_generic_expr (dump_file, oe2->op);
1818                 }
1819               zero_one_operation (&oe2->op, c->oecode, c->op);
1820               sum = build_and_add_sum (TREE_TYPE (oe1->op),
1821                                        oe1->op, oe2->op, opcode);
1822               oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1823               oe2->rank = 0;
1824               oe1->op = gimple_get_lhs (sum);
1825             }
1826
1827           /* Apply the multiplication/division.  */
1828           prod = build_and_add_sum (TREE_TYPE (oe1->op),
1829                                     oe1->op, c->op, c->oecode);
1830           if (dump_file && (dump_flags & TDF_DETAILS))
1831             {
1832               fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1833               print_generic_expr (dump_file, c->op);
1834               fprintf (dump_file, "\n");
1835             }
1836
1837           /* Record it in the addition chain and disable further
1838              undistribution with this op.  */
1839           oe1->op = gimple_assign_lhs (prod);
1840           oe1->rank = get_rank (oe1->op);
1841           subops[first].release ();
1842
1843           changed = true;
1844         }
1845
1846       cvec.pop ();
1847     }
1848
1849   for (i = 0; i < ops->length (); ++i)
1850     subops[i].release ();
1851   free (subops);
1852   cvec.release ();
1853
1854   return changed;
1855 }
1856
1857 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1858    first: element index for each relevant BIT_FIELD_REF.
1859    second: the index of vec ops* for each relevant BIT_FIELD_REF.  */
1860 typedef std::pair<unsigned, unsigned> v_info_elem;
1861 struct v_info {
1862   tree vec_type;
1863   auto_vec<v_info_elem, 32> vec;
1864 };
1865 typedef v_info *v_info_ptr;
1866
1867 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode.  */
1868 static int
1869 sort_by_mach_mode (const void *p_i, const void *p_j)
1870 {
1871   const tree tr1 = *((const tree *) p_i);
1872   const tree tr2 = *((const tree *) p_j);
1873   unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1874   unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1875   if (mode1 > mode2)
1876     return 1;
1877   else if (mode1 < mode2)
1878     return -1;
1879   if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1880     return -1;
1881   else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1882     return 1;
1883   return 0;
1884 }
1885
1886 /* Cleanup hash map for VECTOR information.  */
1887 static void
1888 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1889 {
1890   for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1891        it != info_map.end (); ++it)
1892     {
1893       v_info_ptr info = (*it).second;
1894       delete info;
1895       (*it).second = NULL;
1896     }
1897 }
1898
1899 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1900      V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1901    is transformed to
1902      Vs = (V1 + V2 + ... + Vn)
1903      Vs[0] + Vs[1] + ... + Vs[k]
1904
1905    The basic steps are listed below:
1906
1907     1) Check the addition chain *OPS by looking those summands coming from
1908        VECTOR bit_field_ref on VECTOR type.  Put the information into
1909        v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1910
1911     2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1912        continuous, they can cover the whole VECTOR perfectly without any holes.
1913        Obtain one VECTOR list which contain candidates to be transformed.
1914
1915     3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1916        candidates with same mode, build the addition statements for them and
1917        generate BIT_FIELD_REFs accordingly.
1918
1919    TODO:
1920        The current implementation requires the whole VECTORs should be fully
1921        covered, but it can be extended to support partial, checking adjacent
1922        but not fill the whole, it may need some cost model to define the
1923        boundary to do or not.
1924 */
1925 static bool
1926 undistribute_bitref_for_vector (enum tree_code opcode,
1927                                 vec<operand_entry *> *ops, struct loop *loop)
1928 {
1929   if (ops->length () <= 1)
1930     return false;
1931
1932   if (opcode != PLUS_EXPR
1933       && opcode != MULT_EXPR
1934       && opcode != BIT_XOR_EXPR
1935       && opcode != BIT_IOR_EXPR
1936       && opcode != BIT_AND_EXPR)
1937     return false;
1938
1939   hash_map<tree, v_info_ptr> v_info_map;
1940   operand_entry *oe1;
1941   unsigned i;
1942
1943   /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1944      information into map.  */
1945   FOR_EACH_VEC_ELT (*ops, i, oe1)
1946     {
1947       enum tree_code dcode;
1948       gimple *oe1def;
1949
1950       if (TREE_CODE (oe1->op) != SSA_NAME)
1951         continue;
1952       oe1def = SSA_NAME_DEF_STMT (oe1->op);
1953       if (!is_gimple_assign (oe1def))
1954         continue;
1955       dcode = gimple_assign_rhs_code (oe1def);
1956       if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1957         continue;
1958
1959       tree rhs = gimple_assign_rhs1 (oe1def);
1960       tree vec = TREE_OPERAND (rhs, 0);
1961       tree vec_type = TREE_TYPE (vec);
1962
1963       if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1964         continue;
1965
1966       /* Ignore it if target machine can't support this VECTOR type.  */
1967       if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1968         continue;
1969
1970       /* Check const vector type, constrain BIT_FIELD_REF offset and size.  */
1971       if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1972         continue;
1973
1974       if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1975           || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1976         continue;
1977
1978       /* The type of BIT_FIELD_REF might not be equal to the element type of
1979          the vector.  We want to use a vector type with element type the
1980          same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec).  */
1981       if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1982         {
1983           machine_mode simd_mode;
1984           unsigned HOST_WIDE_INT size, nunits;
1985           unsigned HOST_WIDE_INT elem_size
1986             = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1987           if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1988             continue;
1989           if (size <= elem_size || (size % elem_size) != 0)
1990             continue;
1991           nunits = size / elem_size;
1992           if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1993                                 nunits).exists (&simd_mode))
1994             continue;
1995           vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1996
1997           /* Ignore it if target machine can't support this VECTOR type.  */
1998           if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1999             continue;
2000
2001           /* Check const vector type, constrain BIT_FIELD_REF offset and
2002              size.  */
2003           if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2004             continue;
2005
2006           if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2007                         GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2008             continue;
2009         }
2010
2011       tree elem_type = TREE_TYPE (vec_type);
2012       unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2013       if (maybe_ne (bit_field_size (rhs), elem_size))
2014         continue;
2015
2016       unsigned idx;
2017       if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2018         continue;
2019
2020       /* Ignore it if target machine can't support this type of VECTOR
2021          operation.  */
2022       optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2023       if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2024         continue;
2025
2026       bool existed;
2027       v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2028       if (!existed)
2029         {
2030           info = new v_info;
2031           info->vec_type = vec_type;
2032         }
2033       else if (!types_compatible_p (vec_type, info->vec_type))
2034         continue;
2035       info->vec.safe_push (std::make_pair (idx, i));
2036     }
2037
2038   /* At least two VECTOR to combine.  */
2039   if (v_info_map.elements () <= 1)
2040     {
2041       cleanup_vinfo_map (v_info_map);
2042       return false;
2043     }
2044
2045   /* Verify all VECTOR candidates by checking two conditions:
2046        1) sorted offsets are adjacent, no holes.
2047        2) can fill the whole VECTOR perfectly.
2048      And add the valid candidates to a vector for further handling.  */
2049   auto_vec<tree> valid_vecs (v_info_map.elements ());
2050   for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2051        it != v_info_map.end (); ++it)
2052     {
2053       tree cand_vec = (*it).first;
2054       v_info_ptr cand_info = (*it).second;
2055       unsigned int num_elems
2056         = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2057       if (cand_info->vec.length () != num_elems)
2058         continue;
2059       sbitmap holes = sbitmap_alloc (num_elems);
2060       bitmap_ones (holes);
2061       bool valid = true;
2062       v_info_elem *curr;
2063       FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2064         {
2065           if (!bitmap_bit_p (holes, curr->first))
2066             {
2067               valid = false;
2068               break;
2069             }
2070           else
2071             bitmap_clear_bit (holes, curr->first);
2072         }
2073       if (valid && bitmap_empty_p (holes))
2074         valid_vecs.quick_push (cand_vec);
2075       sbitmap_free (holes);
2076     }
2077
2078   /* At least two VECTOR to combine.  */
2079   if (valid_vecs.length () <= 1)
2080     {
2081       cleanup_vinfo_map (v_info_map);
2082       return false;
2083     }
2084
2085   valid_vecs.qsort (sort_by_mach_mode);
2086   /* Go through all candidates by machine mode order, query the mode_to_total
2087      to get the total number for each mode and skip the single one.  */
2088   for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2089     {
2090       tree tvec = valid_vecs[i];
2091       enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2092
2093       /* Skip modes with only a single candidate.  */
2094       if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2095         continue;
2096
2097       unsigned int idx, j;
2098       gimple *sum = NULL;
2099       tree sum_vec = tvec;
2100       v_info_ptr info_ptr = *(v_info_map.get (tvec));
2101       v_info_elem *elem;
2102       tree vec_type = info_ptr->vec_type;
2103
2104       /* Build the sum for all candidates with same mode.  */
2105       do
2106         {
2107           sum = build_and_add_sum (vec_type, sum_vec,
2108                                    valid_vecs[i + 1], opcode);
2109           /* Update the operands only after build_and_add_sum,
2110              so that we don't have to repeat the placement algorithm
2111              of build_and_add_sum.  */
2112           if (sum_vec == tvec
2113               && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2114             {
2115               gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2116               tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2117               tree lhs = make_ssa_name (vec_type);
2118               gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2119               gimple_set_uid (g, gimple_uid (sum));
2120               gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2121               gimple_assign_set_rhs1 (sum, lhs);
2122               update_stmt (sum);
2123             }
2124           if (!useless_type_conversion_p (vec_type,
2125                                           TREE_TYPE (valid_vecs[i + 1])))
2126             {
2127               gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2128               tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2129                                  valid_vecs[i + 1]);
2130               tree lhs = make_ssa_name (vec_type);
2131               gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2132               gimple_set_uid (g, gimple_uid (sum));
2133               gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2134               gimple_assign_set_rhs2 (sum, lhs);
2135               update_stmt (sum);
2136             }
2137           sum_vec = gimple_get_lhs (sum);
2138           info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2139           gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2140           /* Update those related ops of current candidate VECTOR.  */
2141           FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2142             {
2143               idx = elem->second;
2144               gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2145               /* Set this then op definition will get DCEd later.  */
2146               gimple_set_visited (def, true);
2147               if (opcode == PLUS_EXPR
2148                   || opcode == BIT_XOR_EXPR
2149                   || opcode == BIT_IOR_EXPR)
2150                 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2151               else if (opcode == MULT_EXPR)
2152                 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2153               else
2154                 {
2155                   gcc_assert (opcode == BIT_AND_EXPR);
2156                   (*ops)[idx]->op
2157                     = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2158                 }
2159               (*ops)[idx]->rank = 0;
2160             }
2161           if (dump_file && (dump_flags & TDF_DETAILS))
2162             {
2163               fprintf (dump_file, "Generating addition -> ");
2164               print_gimple_stmt (dump_file, sum, 0);
2165             }
2166           i++;
2167         }
2168       while ((i < valid_vecs.length () - 1)
2169              && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2170
2171       /* Referring to first valid VECTOR with this mode, generate the
2172          BIT_FIELD_REF statements accordingly.  */
2173       info_ptr = *(v_info_map.get (tvec));
2174       gcc_assert (sum);
2175       tree elem_type = TREE_TYPE (vec_type);
2176       FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2177         {
2178           idx = elem->second;
2179           tree dst = make_ssa_name (elem_type);
2180           tree pos = bitsize_int (elem->first
2181                                   * tree_to_uhwi (TYPE_SIZE (elem_type)));
2182           tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2183                              TYPE_SIZE (elem_type), pos);
2184           gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2185           insert_stmt_after (gs, sum);
2186           gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2187           /* Set this then op definition will get DCEd later.  */
2188           gimple_set_visited (def, true);
2189           (*ops)[idx]->op = gimple_assign_lhs (gs);
2190           (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2191           if (dump_file && (dump_flags & TDF_DETAILS))
2192             {
2193               fprintf (dump_file, "Generating bit_field_ref -> ");
2194               print_gimple_stmt (dump_file, gs, 0);
2195             }
2196         }
2197     }
2198
2199   if (dump_file && (dump_flags & TDF_DETAILS))
2200     fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2201
2202   cleanup_vinfo_map (v_info_map);
2203
2204   return true;
2205 }
2206
2207 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2208    expression, examine the other OPS to see if any of them are comparisons
2209    of the same values, which we may be able to combine or eliminate.
2210    For example, we can rewrite (a < b) | (a == b) as (a <= b).  */
2211
2212 static bool
2213 eliminate_redundant_comparison (enum tree_code opcode,
2214                                 vec<operand_entry *> *ops,
2215                                 unsigned int currindex,
2216                                 operand_entry *curr)
2217 {
2218   tree op1, op2;
2219   enum tree_code lcode, rcode;
2220   gimple *def1, *def2;
2221   int i;
2222   operand_entry *oe;
2223
2224   if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2225     return false;
2226
2227   /* Check that CURR is a comparison.  */
2228   if (TREE_CODE (curr->op) != SSA_NAME)
2229     return false;
2230   def1 = SSA_NAME_DEF_STMT (curr->op);
2231   if (!is_gimple_assign (def1))
2232     return false;
2233   lcode = gimple_assign_rhs_code (def1);
2234   if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2235     return false;
2236   op1 = gimple_assign_rhs1 (def1);
2237   op2 = gimple_assign_rhs2 (def1);
2238
2239   /* Now look for a similar comparison in the remaining OPS.  */
2240   for (i = currindex + 1; ops->iterate (i, &oe); i++)
2241     {
2242       tree t;
2243
2244       if (TREE_CODE (oe->op) != SSA_NAME)
2245         continue;
2246       def2 = SSA_NAME_DEF_STMT (oe->op);
2247       if (!is_gimple_assign (def2))
2248         continue;
2249       rcode = gimple_assign_rhs_code (def2);
2250       if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2251         continue;
2252
2253       /* If we got here, we have a match.  See if we can combine the
2254          two comparisons.  */
2255       tree type = TREE_TYPE (gimple_assign_lhs (def1));
2256       if (opcode == BIT_IOR_EXPR)
2257         t = maybe_fold_or_comparisons (type,
2258                                        lcode, op1, op2,
2259                                        rcode, gimple_assign_rhs1 (def2),
2260                                        gimple_assign_rhs2 (def2));
2261       else
2262         t = maybe_fold_and_comparisons (type,
2263                                         lcode, op1, op2,
2264                                         rcode, gimple_assign_rhs1 (def2),
2265                                         gimple_assign_rhs2 (def2));
2266       if (!t)
2267         continue;
2268
2269       /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2270          always give us a boolean_type_node value back.  If the original
2271          BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2272          we need to convert.  */
2273       if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2274         {
2275           if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2276             continue;
2277           t = fold_convert (TREE_TYPE (curr->op), t);
2278         }
2279
2280       if (TREE_CODE (t) != INTEGER_CST
2281           && !operand_equal_p (t, curr->op, 0))
2282         {
2283           enum tree_code subcode;
2284           tree newop1, newop2;
2285           if (!COMPARISON_CLASS_P (t))
2286             continue;
2287           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2288           STRIP_USELESS_TYPE_CONVERSION (newop1);
2289           STRIP_USELESS_TYPE_CONVERSION (newop2);
2290           if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2291             continue;
2292           if (lcode == TREE_CODE (t)
2293               && operand_equal_p (op1, newop1, 0)
2294               && operand_equal_p (op2, newop2, 0))
2295             t = curr->op;
2296           else if ((TREE_CODE (newop1) == SSA_NAME
2297                     && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2298                    || (TREE_CODE (newop2) == SSA_NAME
2299                        && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2300             continue;
2301         }
2302
2303       if (dump_file && (dump_flags & TDF_DETAILS))
2304         {
2305           fprintf (dump_file, "Equivalence: ");
2306           print_generic_expr (dump_file, curr->op);
2307           fprintf (dump_file, " %s ", op_symbol_code (opcode));
2308           print_generic_expr (dump_file, oe->op);
2309           fprintf (dump_file, " -> ");
2310           print_generic_expr (dump_file, t);
2311           fprintf (dump_file, "\n");
2312         }
2313
2314       /* Now we can delete oe, as it has been subsumed by the new combined
2315          expression t.  */
2316       ops->ordered_remove (i);
2317       reassociate_stats.ops_eliminated ++;
2318
2319       /* If t is the same as curr->op, we're done.  Otherwise we must
2320          replace curr->op with t.  Special case is if we got a constant
2321          back, in which case we add it to the end instead of in place of
2322          the current entry.  */
2323       if (TREE_CODE (t) == INTEGER_CST)
2324         {
2325           ops->ordered_remove (currindex);
2326           add_to_ops_vec (ops, t);
2327         }
2328       else if (!operand_equal_p (t, curr->op, 0))
2329         {
2330           gimple *sum;
2331           enum tree_code subcode;
2332           tree newop1;
2333           tree newop2;
2334           gcc_assert (COMPARISON_CLASS_P (t));
2335           extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2336           STRIP_USELESS_TYPE_CONVERSION (newop1);
2337           STRIP_USELESS_TYPE_CONVERSION (newop2);
2338           gcc_checking_assert (is_gimple_val (newop1)
2339                                && is_gimple_val (newop2));
2340           sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2341           curr->op = gimple_get_lhs (sum);
2342         }
2343       return true;
2344     }
2345
2346   return false;
2347 }
2348
2349
2350 /* Transform repeated addition of same values into multiply with
2351    constant.  */
2352 static bool
2353 transform_add_to_multiply (vec<operand_entry *> *ops)
2354 {
2355   operand_entry *oe;
2356   tree op = NULL_TREE;
2357   int j;
2358   int i, start = -1, end = 0, count = 0;
2359   auto_vec<std::pair <int, int> > indxs;
2360   bool changed = false;
2361
2362   if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2363       && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2364           || !flag_unsafe_math_optimizations))
2365     return false;
2366
2367   /* Look for repeated operands.  */
2368   FOR_EACH_VEC_ELT (*ops, i, oe)
2369     {
2370       if (start == -1)
2371         {
2372           count = 1;
2373           op = oe->op;
2374           start = i;
2375         }
2376       else if (operand_equal_p (oe->op, op, 0))
2377         {
2378           count++;
2379           end = i;
2380         }
2381       else
2382         {
2383           if (count > 1)
2384             indxs.safe_push (std::make_pair (start, end));
2385           count = 1;
2386           op = oe->op;
2387           start = i;
2388         }
2389     }
2390
2391   if (count > 1)
2392     indxs.safe_push (std::make_pair (start, end));
2393
2394   for (j = indxs.length () - 1; j >= 0; --j)
2395     {
2396       /* Convert repeated operand addition to multiplication.  */
2397       start = indxs[j].first;
2398       end = indxs[j].second;
2399       op = (*ops)[start]->op;
2400       count = end - start + 1;
2401       for (i = end; i >= start; --i)
2402         ops->unordered_remove (i);
2403       tree tmp = make_ssa_name (TREE_TYPE (op));
2404       tree cst = build_int_cst (integer_type_node, count);
2405       gassign *mul_stmt
2406         = gimple_build_assign (tmp, MULT_EXPR,
2407                                op, fold_convert (TREE_TYPE (op), cst));
2408       gimple_set_visited (mul_stmt, true);
2409       add_to_ops_vec (ops, tmp, mul_stmt);
2410       changed = true;
2411     }
2412
2413   return changed;
2414 }
2415
2416
2417 /* Perform various identities and other optimizations on the list of
2418    operand entries, stored in OPS.  The tree code for the binary
2419    operation between all the operands is OPCODE.  */
2420
2421 static void
2422 optimize_ops_list (enum tree_code opcode,
2423                    vec<operand_entry *> *ops)
2424 {
2425   unsigned int length = ops->length ();
2426   unsigned int i;
2427   operand_entry *oe;
2428   operand_entry *oelast = NULL;
2429   bool iterate = false;
2430
2431   if (length == 1)
2432     return;
2433
2434   oelast = ops->last ();
2435
2436   /* If the last two are constants, pop the constants off, merge them
2437      and try the next two.  */
2438   if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2439     {
2440       operand_entry *oelm1 = (*ops)[length - 2];
2441
2442       if (oelm1->rank == 0
2443           && is_gimple_min_invariant (oelm1->op)
2444           && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2445                                        TREE_TYPE (oelast->op)))
2446         {
2447           tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2448                                      oelm1->op, oelast->op);
2449
2450           if (folded && is_gimple_min_invariant (folded))
2451             {
2452               if (dump_file && (dump_flags & TDF_DETAILS))
2453                 fprintf (dump_file, "Merging constants\n");
2454
2455               ops->pop ();
2456               ops->pop ();
2457
2458               add_to_ops_vec (ops, folded);
2459               reassociate_stats.constants_eliminated++;
2460
2461               optimize_ops_list (opcode, ops);
2462               return;
2463             }
2464         }
2465     }
2466
2467   eliminate_using_constants (opcode, ops);
2468   oelast = NULL;
2469
2470   for (i = 0; ops->iterate (i, &oe);)
2471     {
2472       bool done = false;
2473
2474       if (eliminate_not_pairs (opcode, ops, i, oe))
2475         return;
2476       if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2477           || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2478           || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2479         {
2480           if (done)
2481             return;
2482           iterate = true;
2483           oelast = NULL;
2484           continue;
2485         }
2486       oelast = oe;
2487       i++;
2488     }
2489
2490   if (iterate)
2491     optimize_ops_list (opcode, ops);
2492 }
2493
2494 /* The following functions are subroutines to optimize_range_tests and allow
2495    it to try to change a logical combination of comparisons into a range
2496    test.
2497
2498    For example, both
2499         X == 2 || X == 5 || X == 3 || X == 4
2500    and
2501         X >= 2 && X <= 5
2502    are converted to
2503         (unsigned) (X - 2) <= 3
2504
2505    For more information see comments above fold_test_range in fold-const.cc,
2506    this implementation is for GIMPLE.  */
2507
2508
2509
2510 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP.  */
2511
2512 void
2513 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2514 {
2515   if (!skip_exp)
2516     print_generic_expr (file, r->exp);
2517   fprintf (file, " %c[", r->in_p ? '+' : '-');
2518   print_generic_expr (file, r->low);
2519   fputs (", ", file);
2520   print_generic_expr (file, r->high);
2521   fputc (']', file);
2522 }
2523
2524 /* Dump the range entry R to STDERR.  */
2525
2526 DEBUG_FUNCTION void
2527 debug_range_entry (struct range_entry *r)
2528 {
2529   dump_range_entry (stderr, r, false);
2530   fputc ('\n', stderr);
2531 }
2532
2533 /* This is similar to make_range in fold-const.cc, but on top of
2534    GIMPLE instead of trees.  If EXP is non-NULL, it should be
2535    an SSA_NAME and STMT argument is ignored, otherwise STMT
2536    argument should be a GIMPLE_COND.  */
2537
2538 void
2539 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2540 {
2541   int in_p;
2542   tree low, high;
2543   bool is_bool, strict_overflow_p;
2544
2545   r->exp = NULL_TREE;
2546   r->in_p = false;
2547   r->strict_overflow_p = false;
2548   r->low = NULL_TREE;
2549   r->high = NULL_TREE;
2550   if (exp != NULL_TREE
2551       && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2552     return;
2553
2554   /* Start with simply saying "EXP != 0" and then look at the code of EXP
2555      and see if we can refine the range.  Some of the cases below may not
2556      happen, but it doesn't seem worth worrying about this.  We "continue"
2557      the outer loop when we've changed something; otherwise we "break"
2558      the switch, which will "break" the while.  */
2559   low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2560   high = low;
2561   in_p = 0;
2562   strict_overflow_p = false;
2563   is_bool = false;
2564   if (exp == NULL_TREE)
2565     is_bool = true;
2566   else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2567     {
2568       if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2569         is_bool = true;
2570       else
2571         return;
2572     }
2573   else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2574     is_bool = true;
2575
2576   while (1)
2577     {
2578       enum tree_code code;
2579       tree arg0, arg1, exp_type;
2580       tree nexp;
2581       location_t loc;
2582
2583       if (exp != NULL_TREE)
2584         {
2585           if (TREE_CODE (exp) != SSA_NAME
2586               || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2587             break;
2588
2589           stmt = SSA_NAME_DEF_STMT (exp);
2590           if (!is_gimple_assign (stmt))
2591             break;
2592
2593           code = gimple_assign_rhs_code (stmt);
2594           arg0 = gimple_assign_rhs1 (stmt);
2595           arg1 = gimple_assign_rhs2 (stmt);
2596           exp_type = TREE_TYPE (exp);
2597         }
2598       else
2599         {
2600           code = gimple_cond_code (stmt);
2601           arg0 = gimple_cond_lhs (stmt);
2602           arg1 = gimple_cond_rhs (stmt);
2603           exp_type = boolean_type_node;
2604         }
2605
2606       if (TREE_CODE (arg0) != SSA_NAME
2607           || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)
2608           || ssa_name_maybe_undef_p (arg0))
2609         break;
2610       loc = gimple_location (stmt);
2611       switch (code)
2612         {
2613         case BIT_NOT_EXPR:
2614           if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2615               /* Ensure the range is either +[-,0], +[0,0],
2616                  -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2617                  -[1,1].  If it is e.g. +[-,-] or -[-,-]
2618                  or similar expression of unconditional true or
2619                  false, it should not be negated.  */
2620               && ((high && integer_zerop (high))
2621                   || (low && integer_onep (low))))
2622             {
2623               in_p = !in_p;
2624               exp = arg0;
2625               continue;
2626             }
2627           break;
2628         case SSA_NAME:
2629           exp = arg0;
2630           continue;
2631         CASE_CONVERT:
2632           if (is_bool)
2633             {
2634               if ((TYPE_PRECISION (exp_type) == 1
2635                    || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2636                   && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2637                 return;
2638             }
2639           else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2640             {
2641               if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2642                 is_bool = true;
2643               else
2644                 return;
2645             }
2646           else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2647             is_bool = true;
2648           goto do_default;
2649         case EQ_EXPR:
2650         case NE_EXPR:
2651         case LT_EXPR:
2652         case LE_EXPR:
2653         case GE_EXPR:
2654         case GT_EXPR:
2655           is_bool = true;
2656           /* FALLTHRU */
2657         default:
2658           if (!is_bool)
2659             return;
2660         do_default:
2661           nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2662                                   &low, &high, &in_p,
2663                                   &strict_overflow_p);
2664           if (nexp != NULL_TREE)
2665             {
2666               exp = nexp;
2667               gcc_assert (TREE_CODE (exp) == SSA_NAME);
2668               continue;
2669             }
2670           break;
2671         }
2672       break;
2673     }
2674   if (is_bool)
2675     {
2676       r->exp = exp;
2677       r->in_p = in_p;
2678       r->low = low;
2679       r->high = high;
2680       r->strict_overflow_p = strict_overflow_p;
2681     }
2682 }
2683
2684 /* Comparison function for qsort.  Sort entries
2685    without SSA_NAME exp first, then with SSA_NAMEs sorted
2686    by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2687    by increasing ->low and if ->low is the same, by increasing
2688    ->high.  ->low == NULL_TREE means minimum, ->high == NULL_TREE
2689    maximum.  */
2690
2691 static int
2692 range_entry_cmp (const void *a, const void *b)
2693 {
2694   const struct range_entry *p = (const struct range_entry *) a;
2695   const struct range_entry *q = (const struct range_entry *) b;
2696
2697   if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2698     {
2699       if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2700         {
2701           /* Group range_entries for the same SSA_NAME together.  */
2702           if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2703             return -1;
2704           else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2705             return 1;
2706           /* If ->low is different, NULL low goes first, then by
2707              ascending low.  */
2708           if (p->low != NULL_TREE)
2709             {
2710               if (q->low != NULL_TREE)
2711                 {
2712                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2713                                           p->low, q->low);
2714                   if (tem && integer_onep (tem))
2715                     return -1;
2716                   tem = fold_binary (GT_EXPR, boolean_type_node,
2717                                      p->low, q->low);
2718                   if (tem && integer_onep (tem))
2719                     return 1;
2720                 }
2721               else
2722                 return 1;
2723             }
2724           else if (q->low != NULL_TREE)
2725             return -1;
2726           /* If ->high is different, NULL high goes last, before that by
2727              ascending high.  */
2728           if (p->high != NULL_TREE)
2729             {
2730               if (q->high != NULL_TREE)
2731                 {
2732                   tree tem = fold_binary (LT_EXPR, boolean_type_node,
2733                                           p->high, q->high);
2734                   if (tem && integer_onep (tem))
2735                     return -1;
2736                   tem = fold_binary (GT_EXPR, boolean_type_node,
2737                                      p->high, q->high);
2738                   if (tem && integer_onep (tem))
2739                     return 1;
2740                 }
2741               else
2742                 return -1;
2743             }
2744           else if (q->high != NULL_TREE)
2745             return 1;
2746           /* If both ranges are the same, sort below by ascending idx.  */
2747         }
2748       else
2749         return 1;
2750     }
2751   else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2752     return -1;
2753
2754   if (p->idx < q->idx)
2755     return -1;
2756   else
2757     {
2758       gcc_checking_assert (p->idx > q->idx);
2759       return 1;
2760     }
2761 }
2762
2763 /* Helper function for update_range_test.  Force EXPR into an SSA_NAME,
2764    insert needed statements BEFORE or after GSI.  */
2765
2766 static tree
2767 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2768 {
2769   enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2770   tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2771   if (TREE_CODE (ret) != SSA_NAME)
2772     {
2773       gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2774       if (before)
2775         gsi_insert_before (gsi, g, GSI_SAME_STMT);
2776       else
2777         gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2778       ret = gimple_assign_lhs (g);
2779     }
2780   return ret;
2781 }
2782
2783 /* Helper routine of optimize_range_test.
2784    [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2785    RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2786    OPCODE and OPS are arguments of optimize_range_tests.  If OTHERRANGE
2787    is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2788    an array of COUNT pointers to other ranges.  Return
2789    true if the range merge has been successful.
2790    If OPCODE is ERROR_MARK, this is called from within
2791    maybe_optimize_range_tests and is performing inter-bb range optimization.
2792    In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2793    oe->rank.  */
2794
2795 static bool
2796 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2797                    struct range_entry **otherrangep,
2798                    unsigned int count, enum tree_code opcode,
2799                    vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2800                    bool in_p, tree low, tree high, bool strict_overflow_p)
2801 {
2802   unsigned int idx = range->idx;
2803   struct range_entry *swap_with = NULL;
2804   basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2805   if (opcode == ERROR_MARK)
2806     {
2807       /* For inter-bb range test optimization, pick from the range tests
2808          the one which is tested in the earliest condition (one dominating
2809          the others), because otherwise there could be some UB (e.g. signed
2810          overflow) in following bbs that we'd expose which wasn't there in
2811          the original program.  See PR104196.  */
2812       basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2813       basic_block range_bb = orig_range_bb;
2814       for (unsigned int i = 0; i < count; i++)
2815         {
2816           struct range_entry *this_range;
2817           if (otherrange)
2818             this_range = otherrange + i;
2819           else
2820             this_range = otherrangep[i];
2821           operand_entry *oe = (*ops)[this_range->idx];
2822           basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2823           if (range_bb != this_bb
2824               && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2825             {
2826               swap_with = this_range;
2827               range_bb = this_bb;
2828               idx = this_range->idx;
2829             }
2830         }
2831       /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2832          only defined in later blocks.  In this case we can't move the
2833          merged comparison earlier, so instead check if there are any stmts
2834          that might trigger signed integer overflow in between and rewrite
2835          them.  But only after we check if the optimization is possible.  */
2836       if (seq && swap_with)
2837         {
2838           rewrite_bb_first = range_bb;
2839           rewrite_bb_last = orig_range_bb;
2840           idx = range->idx;
2841           swap_with = NULL;
2842         }
2843     }
2844   operand_entry *oe = (*ops)[idx];
2845   tree op = oe->op;
2846   gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2847                     : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2848   location_t loc = gimple_location (stmt);
2849   tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2850   tree tem = build_range_check (loc, optype, unshare_expr (exp),
2851                                 in_p, low, high);
2852   enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2853   gimple_stmt_iterator gsi;
2854   unsigned int i, uid;
2855
2856   if (tem == NULL_TREE)
2857     return false;
2858
2859   /* If op is default def SSA_NAME, there is no place to insert the
2860      new comparison.  Give up, unless we can use OP itself as the
2861      range test.  */
2862   if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2863     {
2864       if (op == range->exp
2865           && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2866               || TREE_CODE (optype) == BOOLEAN_TYPE)
2867           && (op == tem
2868               || (TREE_CODE (tem) == EQ_EXPR
2869                   && TREE_OPERAND (tem, 0) == op
2870                   && integer_onep (TREE_OPERAND (tem, 1))))
2871           && opcode != BIT_IOR_EXPR
2872           && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2873         {
2874           stmt = NULL;
2875           tem = op;
2876         }
2877       else
2878         return false;
2879     }
2880
2881   if (swap_with)
2882     std::swap (range->idx, swap_with->idx);
2883
2884   if (strict_overflow_p && issue_strict_overflow_warning (wc))
2885     warning_at (loc, OPT_Wstrict_overflow,
2886                 "assuming signed overflow does not occur "
2887                 "when simplifying range test");
2888
2889   if (dump_file && (dump_flags & TDF_DETAILS))
2890     {
2891       struct range_entry *r;
2892       fprintf (dump_file, "Optimizing range tests ");
2893       dump_range_entry (dump_file, range, false);
2894       for (i = 0; i < count; i++)
2895         {
2896           if (otherrange)
2897             r = otherrange + i;
2898           else
2899             r = otherrangep[i];
2900           if (r->exp
2901               && r->exp != range->exp
2902               && TREE_CODE (r->exp) == SSA_NAME)
2903             {
2904               fprintf (dump_file, " and ");
2905               dump_range_entry (dump_file, r, false);
2906             }
2907           else
2908             {
2909               fprintf (dump_file, " and");
2910               dump_range_entry (dump_file, r, true);
2911             }
2912         }
2913       fprintf (dump_file, "\n into ");
2914       print_generic_expr (dump_file, tem);
2915       fprintf (dump_file, "\n");
2916     }
2917
2918   /* In inter-bb range optimization mode, if we have a seq, we can't
2919      move the merged comparison to the earliest bb from the comparisons
2920      being replaced, so instead rewrite stmts that could trigger signed
2921      integer overflow.  */
2922   for (basic_block bb = rewrite_bb_last;
2923        bb != rewrite_bb_first; bb = single_pred (bb))
2924     for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2925          !gsi_end_p (gsi); gsi_next (&gsi))
2926       {
2927         gimple *stmt = gsi_stmt (gsi);
2928         if (is_gimple_assign (stmt))
2929           if (tree lhs = gimple_assign_lhs (stmt))
2930             if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2931                  || POINTER_TYPE_P (TREE_TYPE (lhs)))
2932                 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2933               {
2934                 enum tree_code code = gimple_assign_rhs_code (stmt);
2935                 if (arith_code_with_undefined_signed_overflow (code))
2936                   {
2937                     gimple_stmt_iterator gsip = gsi;
2938                     gimple_stmt_iterator gsin = gsi;
2939                     gsi_prev (&gsip);
2940                     gsi_next (&gsin);
2941                     rewrite_to_defined_overflow (&gsi);
2942                     unsigned uid = gimple_uid (stmt);
2943                     if (gsi_end_p (gsip))
2944                       gsip = gsi_after_labels (bb);
2945                     else
2946                       gsi_next (&gsip);
2947                     for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2948                          gsi_next (&gsip))
2949                       gimple_set_uid (gsi_stmt (gsip), uid);
2950                   }
2951               }
2952       }
2953
2954   if (opcode == BIT_IOR_EXPR
2955       || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2956     tem = invert_truthvalue_loc (loc, tem);
2957
2958   tem = fold_convert_loc (loc, optype, tem);
2959   if (stmt)
2960     {
2961       gsi = gsi_for_stmt (stmt);
2962       uid = gimple_uid (stmt);
2963     }
2964   else
2965     {
2966       gsi = gsi_none ();
2967       uid = 0;
2968     }
2969   if (stmt == NULL)
2970     gcc_checking_assert (tem == op);
2971   /* In rare cases range->exp can be equal to lhs of stmt.
2972      In that case we have to insert after the stmt rather then before
2973      it.  If stmt is a PHI, insert it at the start of the basic block.  */
2974   else if (op != range->exp)
2975     {
2976       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2977       tem = force_into_ssa_name (&gsi, tem, true);
2978       gsi_prev (&gsi);
2979     }
2980   else if (gimple_code (stmt) != GIMPLE_PHI)
2981     {
2982       gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2983       tem = force_into_ssa_name (&gsi, tem, false);
2984     }
2985   else
2986     {
2987       gsi = gsi_after_labels (gimple_bb (stmt));
2988       if (!gsi_end_p (gsi))
2989         uid = gimple_uid (gsi_stmt (gsi));
2990       else
2991         {
2992           gsi = gsi_start_bb (gimple_bb (stmt));
2993           uid = 1;
2994           while (!gsi_end_p (gsi))
2995             {
2996               uid = gimple_uid (gsi_stmt (gsi));
2997               gsi_next (&gsi);
2998             }
2999         }
3000       gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3001       tem = force_into_ssa_name (&gsi, tem, true);
3002       if (gsi_end_p (gsi))
3003         gsi = gsi_last_bb (gimple_bb (stmt));
3004       else
3005         gsi_prev (&gsi);
3006     }
3007   for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3008     if (gimple_uid (gsi_stmt (gsi)))
3009       break;
3010     else
3011       gimple_set_uid (gsi_stmt (gsi), uid);
3012
3013   oe->op = tem;
3014   range->exp = exp;
3015   range->low = low;
3016   range->high = high;
3017   range->in_p = in_p;
3018   range->strict_overflow_p = false;
3019
3020   for (i = 0; i < count; i++)
3021     {
3022       if (otherrange)
3023         range = otherrange + i;
3024       else
3025         range = otherrangep[i];
3026       oe = (*ops)[range->idx];
3027       /* Now change all the other range test immediate uses, so that
3028          those tests will be optimized away.  */
3029       if (opcode == ERROR_MARK)
3030         {
3031           if (oe->op)
3032             oe->op = build_int_cst (TREE_TYPE (oe->op),
3033                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
3034           else
3035             oe->op = (oe->rank == BIT_IOR_EXPR
3036                       ? boolean_false_node : boolean_true_node);
3037         }
3038       else
3039         oe->op = error_mark_node;
3040       range->exp = NULL_TREE;
3041       range->low = NULL_TREE;
3042       range->high = NULL_TREE;
3043     }
3044   return true;
3045 }
3046
3047 /* Optimize X == CST1 || X == CST2
3048    if popcount (CST1 ^ CST2) == 1 into
3049    (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3050    Similarly for ranges.  E.g.
3051    X != 2 && X != 3 && X != 10 && X != 11
3052    will be transformed by the previous optimization into
3053    !((X - 2U) <= 1U || (X - 10U) <= 1U)
3054    and this loop can transform that into
3055    !(((X & ~8) - 2U) <= 1U).  */
3056
3057 static bool
3058 optimize_range_tests_xor (enum tree_code opcode, tree type,
3059                           tree lowi, tree lowj, tree highi, tree highj,
3060                           vec<operand_entry *> *ops,
3061                           struct range_entry *rangei,
3062                           struct range_entry *rangej)
3063 {
3064   tree lowxor, highxor, tem, exp;
3065   /* Check lowi ^ lowj == highi ^ highj and
3066      popcount (lowi ^ lowj) == 1.  */
3067   lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3068   if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3069     return false;
3070   if (!integer_pow2p (lowxor))
3071     return false;
3072   highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3073   if (!tree_int_cst_equal (lowxor, highxor))
3074     return false;
3075
3076   exp = rangei->exp;
3077   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3078   int prec = GET_MODE_PRECISION (mode);
3079   if (TYPE_PRECISION (type) < prec
3080       || (wi::to_wide (TYPE_MIN_VALUE (type))
3081           != wi::min_value (prec, TYPE_SIGN (type)))
3082       || (wi::to_wide (TYPE_MAX_VALUE (type))
3083           != wi::max_value (prec, TYPE_SIGN (type))))
3084     {
3085       type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3086       exp = fold_convert (type, exp);
3087       lowxor = fold_convert (type, lowxor);
3088       lowi = fold_convert (type, lowi);
3089       highi = fold_convert (type, highi);
3090     }
3091   tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3092   exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3093   lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3094   highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3095   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3096                          NULL, rangei->in_p, lowj, highj,
3097                          rangei->strict_overflow_p
3098                          || rangej->strict_overflow_p))
3099     return true;
3100   return false;
3101 }
3102
3103 /* Optimize X == CST1 || X == CST2
3104    if popcount (CST2 - CST1) == 1 into
3105    ((X - CST1) & ~(CST2 - CST1)) == 0.
3106    Similarly for ranges.  E.g.
3107    X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3108    || X == 75 || X == 45
3109    will be transformed by the previous optimization into
3110    (X - 43U) <= 3U || (X - 75U) <= 3U
3111    and this loop can transform that into
3112    ((X - 43U) & ~(75U - 43U)) <= 3U.  */
3113 static bool
3114 optimize_range_tests_diff (enum tree_code opcode, tree type,
3115                            tree lowi, tree lowj, tree highi, tree highj,
3116                            vec<operand_entry *> *ops,
3117                            struct range_entry *rangei,
3118                            struct range_entry *rangej)
3119 {
3120   tree tem1, tem2, mask;
3121   /* Check highi - lowi == highj - lowj.  */
3122   tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3123   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3124     return false;
3125   tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3126   if (!tree_int_cst_equal (tem1, tem2))
3127     return false;
3128   /* Check popcount (lowj - lowi) == 1.  */
3129   tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3130   if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3131     return false;
3132   if (!integer_pow2p (tem1))
3133     return false;
3134
3135   scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3136   int prec = GET_MODE_PRECISION (mode);
3137   if (TYPE_PRECISION (type) < prec
3138       || (wi::to_wide (TYPE_MIN_VALUE (type))
3139           != wi::min_value (prec, TYPE_SIGN (type)))
3140       || (wi::to_wide (TYPE_MAX_VALUE (type))
3141           != wi::max_value (prec, TYPE_SIGN (type))))
3142     type = build_nonstandard_integer_type (prec, 1);
3143   else
3144     type = unsigned_type_for (type);
3145   tem1 = fold_convert (type, tem1);
3146   tem2 = fold_convert (type, tem2);
3147   lowi = fold_convert (type, lowi);
3148   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3149   tem1 = fold_build2 (MINUS_EXPR, type,
3150                       fold_convert (type, rangei->exp), lowi);
3151   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3152   lowj = build_int_cst (type, 0);
3153   if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3154                          NULL, rangei->in_p, lowj, tem2,
3155                          rangei->strict_overflow_p
3156                          || rangej->strict_overflow_p))
3157     return true;
3158   return false;
3159 }
3160
3161 /* It does some common checks for function optimize_range_tests_xor and
3162    optimize_range_tests_diff.
3163    If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3164    Else it calls optimize_range_tests_diff.  */
3165
3166 static bool
3167 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3168                         bool optimize_xor, vec<operand_entry *> *ops,
3169                         struct range_entry *ranges)
3170 {
3171   int i, j;
3172   bool any_changes = false;
3173   for (i = first; i < length; i++)
3174     {
3175       tree lowi, highi, lowj, highj, type, tem;
3176
3177       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3178         continue;
3179       type = TREE_TYPE (ranges[i].exp);
3180       if (!INTEGRAL_TYPE_P (type))
3181         continue;
3182       lowi = ranges[i].low;
3183       if (lowi == NULL_TREE)
3184         lowi = TYPE_MIN_VALUE (type);
3185       highi = ranges[i].high;
3186       if (highi == NULL_TREE)
3187         continue;
3188       for (j = i + 1; j < length && j < i + 64; j++)
3189         {
3190           bool changes;
3191           if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3192             continue;
3193           lowj = ranges[j].low;
3194           if (lowj == NULL_TREE)
3195             continue;
3196           highj = ranges[j].high;
3197           if (highj == NULL_TREE)
3198             highj = TYPE_MAX_VALUE (type);
3199           /* Check lowj > highi.  */
3200           tem = fold_binary (GT_EXPR, boolean_type_node,
3201                              lowj, highi);
3202           if (tem == NULL_TREE || !integer_onep (tem))
3203             continue;
3204           if (optimize_xor)
3205             changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3206                                                 highi, highj, ops,
3207                                                 ranges + i, ranges + j);
3208           else
3209             changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3210                                                  highi, highj, ops,
3211                                                  ranges + i, ranges + j);
3212           if (changes)
3213             {
3214               any_changes = true;
3215               break;
3216             }
3217         }
3218     }
3219   return any_changes;
3220 }
3221
3222 /* Helper function of optimize_range_tests_to_bit_test.  Handle a single
3223    range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3224    EXP on success, NULL otherwise.  */
3225
3226 static tree
3227 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3228                        wide_int *mask, tree *totallowp)
3229 {
3230   tree tem = int_const_binop (MINUS_EXPR, high, low);
3231   if (tem == NULL_TREE
3232       || TREE_CODE (tem) != INTEGER_CST
3233       || TREE_OVERFLOW (tem)
3234       || tree_int_cst_sgn (tem) == -1
3235       || compare_tree_int (tem, prec) != -1)
3236     return NULL_TREE;
3237
3238   unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3239   *mask = wi::shifted_mask (0, max, false, prec);
3240   if (TREE_CODE (exp) == BIT_AND_EXPR
3241       && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3242     {
3243       widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3244       msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3245       if (wi::popcount (msk) == 1
3246           && wi::ltu_p (msk, prec - max))
3247         {
3248           *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3249           max += msk.to_uhwi ();
3250           exp = TREE_OPERAND (exp, 0);
3251           if (integer_zerop (low)
3252               && TREE_CODE (exp) == PLUS_EXPR
3253               && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3254             {
3255               tree ret = TREE_OPERAND (exp, 0);
3256               STRIP_NOPS (ret);
3257               widest_int bias
3258                 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3259                                      TYPE_PRECISION (TREE_TYPE (low))));
3260               tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3261               if (totallowp)
3262                 {
3263                   *totallowp = tbias;
3264                   return ret;
3265                 }
3266               else if (!tree_int_cst_lt (totallow, tbias))
3267                 return NULL_TREE;
3268               bias = wi::to_widest (tbias);
3269               bias -= wi::to_widest (totallow);
3270               if (bias >= 0 && bias < prec - max)
3271                 {
3272                   *mask = wi::lshift (*mask, bias);
3273                   return ret;
3274                 }
3275             }
3276         }
3277     }
3278   if (totallowp)
3279     return exp;
3280   if (!tree_int_cst_lt (totallow, low))
3281     return exp;
3282   tem = int_const_binop (MINUS_EXPR, low, totallow);
3283   if (tem == NULL_TREE
3284       || TREE_CODE (tem) != INTEGER_CST
3285       || TREE_OVERFLOW (tem)
3286       || compare_tree_int (tem, prec - max) == 1)
3287     return NULL_TREE;
3288
3289   *mask = wi::lshift (*mask, wi::to_widest (tem));
3290   return exp;
3291 }
3292
3293 /* Attempt to optimize small range tests using bit test.
3294    E.g.
3295    X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3296    && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3297    has been by earlier optimizations optimized into:
3298    ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3299    As all the 43 through 82 range is less than 64 numbers,
3300    for 64-bit word targets optimize that into:
3301    (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0  */
3302
3303 static bool
3304 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3305                                   vec<operand_entry *> *ops,
3306                                   struct range_entry *ranges)
3307 {
3308   int i, j;
3309   bool any_changes = false;
3310   int prec = GET_MODE_BITSIZE (word_mode);
3311   auto_vec<struct range_entry *, 64> candidates;
3312
3313   for (i = first; i < length - 1; i++)
3314     {
3315       tree lowi, highi, lowj, highj, type;
3316
3317       if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3318         continue;
3319       type = TREE_TYPE (ranges[i].exp);
3320       if (!INTEGRAL_TYPE_P (type))
3321         continue;
3322       lowi = ranges[i].low;
3323       if (lowi == NULL_TREE)
3324         lowi = TYPE_MIN_VALUE (type);
3325       highi = ranges[i].high;
3326       if (highi == NULL_TREE)
3327         continue;
3328       wide_int mask;
3329       tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3330                                         highi, &mask, &lowi);
3331       if (exp == NULL_TREE)
3332         continue;
3333       bool strict_overflow_p = ranges[i].strict_overflow_p;
3334       candidates.truncate (0);
3335       int end = MIN (i + 64, length);
3336       for (j = i + 1; j < end; j++)
3337         {
3338           tree exp2;
3339           if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3340             continue;
3341           if (ranges[j].exp == exp)
3342             ;
3343           else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3344             {
3345               exp2 = TREE_OPERAND (ranges[j].exp, 0);
3346               if (exp2 == exp)
3347                 ;
3348               else if (TREE_CODE (exp2) == PLUS_EXPR)
3349                 {
3350                   exp2 = TREE_OPERAND (exp2, 0);
3351                   STRIP_NOPS (exp2);
3352                   if (exp2 != exp)
3353                     continue;
3354                 }
3355               else
3356                 continue;
3357             }
3358           else
3359             continue;
3360           lowj = ranges[j].low;
3361           if (lowj == NULL_TREE)
3362             continue;
3363           highj = ranges[j].high;
3364           if (highj == NULL_TREE)
3365             highj = TYPE_MAX_VALUE (type);
3366           wide_int mask2;
3367           exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3368                                         highj, &mask2, NULL);
3369           if (exp2 != exp)
3370             continue;
3371           mask |= mask2;
3372           strict_overflow_p |= ranges[j].strict_overflow_p;
3373           candidates.safe_push (&ranges[j]);
3374         }
3375
3376       /* If every possible relative value of the expression is a valid shift
3377          amount, then we can merge the entry test in the bit test.  In this
3378          case, if we would need otherwise 2 or more comparisons, then use
3379          the bit test; in the other cases, the threshold is 3 comparisons.  */
3380       bool entry_test_needed;
3381       int_range_max r;
3382       if (TREE_CODE (exp) == SSA_NAME
3383           && get_range_query (cfun)->range_of_expr (r, exp)
3384           && !r.undefined_p ()
3385           && !r.varying_p ()
3386           && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3387         {
3388           wide_int min = r.lower_bound ();
3389           wide_int ilowi = wi::to_wide (lowi);
3390           if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3391             {
3392               lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3393               mask = wi::lshift (mask, ilowi - min);
3394             }
3395           else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3396             {
3397               lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3398               mask = wi::lrshift (mask, min - ilowi);
3399             }
3400           entry_test_needed = false;
3401         }
3402       else
3403         entry_test_needed = true;
3404       if (candidates.length () >= (entry_test_needed ? 2 : 1))
3405         {
3406           tree high = wide_int_to_tree (TREE_TYPE (lowi),
3407                                         wi::to_widest (lowi)
3408                                         + prec - 1 - wi::clz (mask));
3409           operand_entry *oe = (*ops)[ranges[i].idx];
3410           tree op = oe->op;
3411           gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3412                             : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3413                                                           (cfun, oe->id));
3414           location_t loc = gimple_location (stmt);
3415           tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3416
3417           /* See if it isn't cheaper to pretend the minimum value of the
3418              range is 0, if maximum value is small enough.
3419              We can avoid then subtraction of the minimum value, but the
3420              mask constant could be perhaps more expensive.  */
3421           if (compare_tree_int (lowi, 0) > 0
3422               && compare_tree_int (high, prec) < 0
3423               && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
3424             {
3425               int cost_diff;
3426               HOST_WIDE_INT m = tree_to_uhwi (lowi);
3427               rtx reg = gen_raw_REG (word_mode, 10000);
3428               bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3429               cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3430                                                       GEN_INT (-m)),
3431                                         word_mode, speed_p);
3432               rtx r = immed_wide_int_const (mask, word_mode);
3433               cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3434                                          word_mode, speed_p);
3435               r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3436               cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3437                                          word_mode, speed_p);
3438               if (cost_diff > 0)
3439                 {
3440                   mask = wi::lshift (mask, m);
3441                   lowi = build_zero_cst (TREE_TYPE (lowi));
3442                 }
3443             }
3444
3445           tree tem;
3446           if (entry_test_needed)
3447             {
3448               tem = build_range_check (loc, optype, unshare_expr (exp),
3449                                        false, lowi, high);
3450               if (tem == NULL_TREE || is_gimple_val (tem))
3451                 continue;
3452             }
3453           else
3454             tem = NULL_TREE;
3455           tree etype = unsigned_type_for (TREE_TYPE (exp));
3456           exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3457                                  fold_convert_loc (loc, etype, exp),
3458                                  fold_convert_loc (loc, etype, lowi));
3459           exp = fold_convert_loc (loc, integer_type_node, exp);
3460           tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3461           exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3462                                  build_int_cst (word_type, 1), exp);
3463           exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3464                                  wide_int_to_tree (word_type, mask));
3465           exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3466                                  build_zero_cst (word_type));
3467           if (is_gimple_val (exp))
3468             continue;
3469
3470           /* The shift might have undefined behavior if TEM is true,
3471              but reassociate_bb isn't prepared to have basic blocks
3472              split when it is running.  So, temporarily emit a code
3473              with BIT_IOR_EXPR instead of &&, and fix it up in
3474              branch_fixup.  */
3475           gimple_seq seq = NULL;
3476           if (tem)
3477             {
3478               tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3479               gcc_assert (TREE_CODE (tem) == SSA_NAME);
3480               gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3481             }
3482           gimple_seq seq2;
3483           exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3484           gimple_seq_add_seq_without_update (&seq, seq2);
3485           gcc_assert (TREE_CODE (exp) == SSA_NAME);
3486           gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3487           if (tem)
3488             {
3489               gimple *g = gimple_build_assign (make_ssa_name (optype),
3490                                                BIT_IOR_EXPR, tem, exp);
3491               gimple_set_location (g, loc);
3492               gimple_seq_add_stmt_without_update (&seq, g);
3493               exp = gimple_assign_lhs (g);
3494             }
3495           tree val = build_zero_cst (optype);
3496           if (update_range_test (&ranges[i], NULL, candidates.address (),
3497                                  candidates.length (), opcode, ops, exp,
3498                                  seq, false, val, val, strict_overflow_p))
3499             {
3500               any_changes = true;
3501               if (tem)
3502                 reassoc_branch_fixups.safe_push (tem);
3503             }
3504           else
3505             gimple_seq_discard (seq);
3506         }
3507     }
3508   return any_changes;
3509 }
3510
3511 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3512    and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3513    Also, handle x < C && y < C && z < C where C is power of two as
3514    (x | y | z) < C.  And also handle signed x < 0 && y < 0 && z < 0
3515    as (x | y | z) < 0.  */
3516
3517 static bool
3518 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3519                                   vec<operand_entry *> *ops,
3520                                   struct range_entry *ranges)
3521 {
3522   int i;
3523   unsigned int b;
3524   bool any_changes = false;
3525   auto_vec<int, 128> buckets;
3526   auto_vec<int, 32> chains;
3527   auto_vec<struct range_entry *, 32> candidates;
3528
3529   for (i = first; i < length; i++)
3530     {
3531       int idx;
3532
3533       if (ranges[i].exp == NULL_TREE
3534           || TREE_CODE (ranges[i].exp) != SSA_NAME
3535           || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3536           || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3537         continue;
3538
3539       if (ranges[i].low != NULL_TREE
3540           && ranges[i].high != NULL_TREE
3541           && ranges[i].in_p
3542           && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3543         {
3544           idx = !integer_zerop (ranges[i].low);
3545           if (idx && !integer_all_onesp (ranges[i].low))
3546             continue;
3547         }
3548       else if (ranges[i].high != NULL_TREE
3549                && TREE_CODE (ranges[i].high) == INTEGER_CST
3550                && ranges[i].in_p)
3551         {
3552           wide_int w = wi::to_wide (ranges[i].high);
3553           int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3554           int l = wi::clz (w);
3555           idx = 2;
3556           if (l <= 0
3557               || l >= prec
3558               || w != wi::mask (prec - l, false, prec))
3559             continue;
3560           if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3561                  && ranges[i].low == NULL_TREE)
3562                 || (ranges[i].low
3563                     && integer_zerop (ranges[i].low))))
3564             continue;
3565         }
3566       else if (ranges[i].high == NULL_TREE
3567                && ranges[i].low != NULL_TREE
3568                /* Perform this optimization only in the last
3569                   reassoc pass, as it interferes with the reassociation
3570                   itself or could also with VRP etc. which might not
3571                   be able to virtually undo the optimization.  */
3572                && !reassoc_insert_powi_p
3573                && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3574                && integer_zerop (ranges[i].low))
3575         idx = 3;
3576       else
3577         continue;
3578
3579       b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3580       if (buckets.length () <= b)
3581         buckets.safe_grow_cleared (b + 1, true);
3582       if (chains.length () <= (unsigned) i)
3583         chains.safe_grow (i + 1, true);
3584       chains[i] = buckets[b];
3585       buckets[b] = i + 1;
3586     }
3587
3588   FOR_EACH_VEC_ELT (buckets, b, i)
3589     if (i && chains[i - 1])
3590       {
3591         int j, k = i;
3592         if ((b % 4) == 2)
3593           {
3594             /* When ranges[X - 1].high + 1 is a power of two,
3595                we need to process the same bucket up to
3596                precision - 1 times, each time split the entries
3597                with the same high bound into one chain and the
3598                rest into another one to be processed later.  */
3599             int this_prev = i;
3600             int other_prev = 0;
3601             for (j = chains[i - 1]; j; j = chains[j - 1])
3602               {
3603                 if (tree_int_cst_equal (ranges[i - 1].high,
3604                                         ranges[j - 1].high))
3605                   {
3606                     chains[this_prev - 1] = j;
3607                     this_prev = j;
3608                   }
3609                 else if (other_prev == 0)
3610                   {
3611                     buckets[b] = j;
3612                     other_prev = j;
3613                   }
3614                 else
3615                   {
3616                     chains[other_prev - 1] = j;
3617                     other_prev = j;
3618                   }
3619               }
3620             chains[this_prev - 1] = 0;
3621             if (other_prev)
3622               chains[other_prev - 1] = 0;
3623             if (chains[i - 1] == 0)
3624               {
3625                 if (other_prev)
3626                   b--;
3627                 continue;
3628               }
3629           }
3630         for (j = chains[i - 1]; j; j = chains[j - 1])
3631           {
3632             gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3633             gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3634             if (reassoc_stmt_dominates_stmt_p (gk, gj))
3635               k = j;
3636           }
3637         tree type1 = TREE_TYPE (ranges[k - 1].exp);
3638         tree type2 = NULL_TREE;
3639         bool strict_overflow_p = false;
3640         candidates.truncate (0);
3641         if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3642           type1 = pointer_sized_int_node;
3643         for (j = i; j; j = chains[j - 1])
3644           {
3645             tree type = TREE_TYPE (ranges[j - 1].exp);
3646             strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3647             if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3648               type = pointer_sized_int_node;
3649             if ((b % 4) == 3)
3650               {
3651                 /* For the signed < 0 cases, the types should be
3652                    really compatible (all signed with the same precision,
3653                    instead put ranges that have different in_p from
3654                    k first.  */
3655                 if (!useless_type_conversion_p (type1, type))
3656                   continue;
3657                 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3658                   candidates.safe_push (&ranges[j - 1]);
3659                 type2 = type1;
3660                 continue;
3661               }
3662             if (j == k
3663                 || useless_type_conversion_p (type1, type))
3664               ;
3665             else if (type2 == NULL_TREE
3666                      || useless_type_conversion_p (type2, type))
3667               {
3668                 if (type2 == NULL_TREE)
3669                   type2 = type;
3670                 candidates.safe_push (&ranges[j - 1]);
3671               }
3672           }
3673         unsigned l = candidates.length ();
3674         for (j = i; j; j = chains[j - 1])
3675           {
3676             tree type = TREE_TYPE (ranges[j - 1].exp);
3677             if (j == k)
3678               continue;
3679             if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3680               type = pointer_sized_int_node;
3681             if ((b % 4) == 3)
3682               {
3683                 if (!useless_type_conversion_p (type1, type))
3684                   continue;
3685                 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3686                   candidates.safe_push (&ranges[j - 1]);
3687                 continue;
3688               }
3689             if (useless_type_conversion_p (type1, type))
3690               ;
3691             else if (type2 == NULL_TREE
3692                      || useless_type_conversion_p (type2, type))
3693               continue;
3694             candidates.safe_push (&ranges[j - 1]);
3695           }
3696         gimple_seq seq = NULL;
3697         tree op = NULL_TREE;
3698         unsigned int id;
3699         struct range_entry *r;
3700         candidates.safe_push (&ranges[k - 1]);
3701         FOR_EACH_VEC_ELT (candidates, id, r)
3702           {
3703             gimple *g;
3704             enum tree_code code;
3705             if (id == 0)
3706               {
3707                 op = r->exp;
3708                 continue;
3709               }
3710             if (id == l
3711                 || POINTER_TYPE_P (TREE_TYPE (op))
3712                 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3713               {
3714                 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3715                 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3716                 if (code == BIT_NOT_EXPR
3717                     && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3718                   {
3719                     g = gimple_build_assign (make_ssa_name (type3),
3720                                              NOP_EXPR, op);
3721                     gimple_seq_add_stmt_without_update (&seq, g);
3722                     op = gimple_assign_lhs (g);
3723                   }
3724                 g = gimple_build_assign (make_ssa_name (type3), code, op);
3725                 gimple_seq_add_stmt_without_update (&seq, g);
3726                 op = gimple_assign_lhs (g);
3727               }
3728             tree type = TREE_TYPE (r->exp);
3729             tree exp = r->exp;
3730             if (POINTER_TYPE_P (type)
3731                 || TREE_CODE (type) == OFFSET_TYPE
3732                 || (id >= l && !useless_type_conversion_p (type1, type)))
3733               {
3734                 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3735                 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3736                 gimple_seq_add_stmt_without_update (&seq, g);
3737                 exp = gimple_assign_lhs (g);
3738               }
3739             if ((b % 4) == 3)
3740               code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3741             else
3742               code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3743             g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3744                                      code, op, exp);
3745             gimple_seq_add_stmt_without_update (&seq, g);
3746             op = gimple_assign_lhs (g);
3747           }
3748         type1 = TREE_TYPE (ranges[k - 1].exp);
3749         if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3750           {
3751             gimple *g
3752               = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3753             gimple_seq_add_stmt_without_update (&seq, g);
3754             op = gimple_assign_lhs (g);
3755           }
3756         candidates.pop ();
3757         if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3758                                candidates.length (), opcode, ops, op,
3759                                seq, ranges[k - 1].in_p, ranges[k - 1].low,
3760                                ranges[k - 1].high, strict_overflow_p))
3761           any_changes = true;
3762         else
3763           gimple_seq_discard (seq);
3764         if ((b % 4) == 2 && buckets[b] != i)
3765           /* There is more work to do for this bucket.  */
3766           b--;
3767       }
3768
3769   return any_changes;
3770 }
3771
3772 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3773    a >= 0 && a < b into (unsigned) a < (unsigned) b
3774    a >= 0 && a <= b into (unsigned) a <= (unsigned) b  */
3775
3776 static bool
3777 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3778                                 vec<operand_entry *> *ops,
3779                                 struct range_entry *ranges,
3780                                 basic_block first_bb)
3781 {
3782   int i;
3783   bool any_changes = false;
3784   hash_map<tree, int> *map = NULL;
3785
3786   for (i = first; i < length; i++)
3787     {
3788       if (ranges[i].exp == NULL_TREE
3789           || TREE_CODE (ranges[i].exp) != SSA_NAME
3790           || !ranges[i].in_p)
3791         continue;
3792
3793       tree type = TREE_TYPE (ranges[i].exp);
3794       if (!INTEGRAL_TYPE_P (type)
3795           || TYPE_UNSIGNED (type)
3796           || ranges[i].low == NULL_TREE
3797           || !integer_zerop (ranges[i].low)
3798           || ranges[i].high != NULL_TREE)
3799         continue;
3800       /* EXP >= 0 here.  */
3801       if (map == NULL)
3802         map = new hash_map <tree, int>;
3803       map->put (ranges[i].exp, i);
3804     }
3805
3806   if (map == NULL)
3807     return false;
3808
3809   for (i = 0; i < length; i++)
3810     {
3811       bool in_p = ranges[i].in_p;
3812       if (ranges[i].low == NULL_TREE
3813           || ranges[i].high == NULL_TREE)
3814         continue;
3815       if (!integer_zerop (ranges[i].low)
3816           || !integer_zerop (ranges[i].high))
3817         {
3818           if (ranges[i].exp
3819               && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3820               && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3821               && integer_onep (ranges[i].low)
3822               && integer_onep (ranges[i].high))
3823             in_p = !in_p;
3824           else
3825             continue;
3826         }
3827
3828       gimple *stmt;
3829       tree_code ccode;
3830       tree rhs1, rhs2;
3831       if (ranges[i].exp)
3832         {
3833           if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3834             continue;
3835           stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3836           if (!is_gimple_assign (stmt))
3837             continue;
3838           ccode = gimple_assign_rhs_code (stmt);
3839           rhs1 = gimple_assign_rhs1 (stmt);
3840           rhs2 = gimple_assign_rhs2 (stmt);
3841         }
3842       else
3843         {
3844           operand_entry *oe = (*ops)[ranges[i].idx];
3845           stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3846           if (gimple_code (stmt) != GIMPLE_COND)
3847             continue;
3848           ccode = gimple_cond_code (stmt);
3849           rhs1 = gimple_cond_lhs (stmt);
3850           rhs2 = gimple_cond_rhs (stmt);
3851         }
3852
3853       if (TREE_CODE (rhs1) != SSA_NAME
3854           || rhs2 == NULL_TREE
3855           || TREE_CODE (rhs2) != SSA_NAME)
3856         continue;
3857
3858       switch (ccode)
3859         {
3860         case GT_EXPR:
3861         case GE_EXPR:
3862         case LT_EXPR:
3863         case LE_EXPR:
3864           break;
3865         default:
3866           continue;
3867         }
3868       if (in_p)
3869         ccode = invert_tree_comparison (ccode, false);
3870       switch (ccode)
3871         {
3872         case GT_EXPR:
3873         case GE_EXPR:
3874           std::swap (rhs1, rhs2);
3875           ccode = swap_tree_comparison (ccode);
3876           break;
3877         case LT_EXPR:
3878         case LE_EXPR:
3879           break;
3880         default:
3881           gcc_unreachable ();
3882         }
3883
3884       int *idx = map->get (rhs1);
3885       if (idx == NULL)
3886         continue;
3887
3888       /* maybe_optimize_range_tests allows statements without side-effects
3889          in the basic blocks as long as they are consumed in the same bb.
3890          Make sure rhs2's def stmt is not among them, otherwise we can't
3891          use safely get_nonzero_bits on it.  E.g. in:
3892           # RANGE [-83, 1] NONZERO 173
3893           # k_32 = PHI <k_47(13), k_12(9)>
3894          ...
3895           if (k_32 >= 0)
3896             goto <bb 5>; [26.46%]
3897           else
3898             goto <bb 9>; [73.54%]
3899
3900           <bb 5> [local count: 140323371]:
3901           # RANGE [0, 1] NONZERO 1
3902           _5 = (int) k_32;
3903           # RANGE [0, 4] NONZERO 4
3904           _21 = _5 << 2;
3905           # RANGE [0, 4] NONZERO 4
3906           iftmp.0_44 = (char) _21;
3907           if (k_32 < iftmp.0_44)
3908             goto <bb 6>; [84.48%]
3909           else
3910             goto <bb 9>; [15.52%]
3911          the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3912          k_32 >= 0.  If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3913          to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3914          those stmts even for negative k_32 and the value ranges would be no
3915          longer guaranteed and so the optimization would be invalid.  */
3916       while (opcode == ERROR_MARK)
3917         {
3918           gimple *g = SSA_NAME_DEF_STMT (rhs2);
3919           basic_block bb2 = gimple_bb (g);
3920           if (bb2
3921               && bb2 != first_bb
3922               && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3923             {
3924               /* As an exception, handle a few common cases.  */
3925               if (gimple_assign_cast_p (g)
3926                   && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3927                 {
3928                   tree op0 = gimple_assign_rhs1 (g);
3929                   if (TYPE_UNSIGNED (TREE_TYPE (op0))
3930                       && (TYPE_PRECISION (TREE_TYPE (rhs2))
3931                           > TYPE_PRECISION (TREE_TYPE (op0))))
3932                     /* Zero-extension is always ok.  */
3933                     break;
3934                   else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3935                            == TYPE_PRECISION (TREE_TYPE (op0))
3936                            && TREE_CODE (op0) == SSA_NAME)
3937                     {
3938                       /* Cast from signed to unsigned or vice versa.  Retry
3939                          with the op0 as new rhs2.  */
3940                       rhs2 = op0;
3941                       continue;
3942                     }
3943                 }
3944               else if (is_gimple_assign (g)
3945                        && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3946                        && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3947                        && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3948                 /* Masking with INTEGER_CST with MSB clear is always ok
3949                    too.  */
3950                 break;
3951               rhs2 = NULL_TREE;
3952             }
3953           break;
3954         }
3955       if (rhs2 == NULL_TREE)
3956         continue;
3957
3958       wide_int nz = get_nonzero_bits (rhs2);
3959       if (wi::neg_p (nz))
3960         continue;
3961
3962       /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3963          and RHS2 is known to be RHS2 >= 0.  */
3964       tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3965
3966       enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3967       if ((ranges[*idx].strict_overflow_p
3968            || ranges[i].strict_overflow_p)
3969           && issue_strict_overflow_warning (wc))
3970         warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3971                     "assuming signed overflow does not occur "
3972                     "when simplifying range test");
3973
3974       if (dump_file && (dump_flags & TDF_DETAILS))
3975         {
3976           struct range_entry *r = &ranges[*idx];
3977           fprintf (dump_file, "Optimizing range test ");
3978           print_generic_expr (dump_file, r->exp);
3979           fprintf (dump_file, " +[");
3980           print_generic_expr (dump_file, r->low);
3981           fprintf (dump_file, ", ");
3982           print_generic_expr (dump_file, r->high);
3983           fprintf (dump_file, "] and comparison ");
3984           print_generic_expr (dump_file, rhs1);
3985           fprintf (dump_file, " %s ", op_symbol_code (ccode));
3986           print_generic_expr (dump_file, rhs2);
3987           fprintf (dump_file, "\n into (");
3988           print_generic_expr (dump_file, utype);
3989           fprintf (dump_file, ") ");
3990           print_generic_expr (dump_file, rhs1);
3991           fprintf (dump_file, " %s (", op_symbol_code (ccode));
3992           print_generic_expr (dump_file, utype);
3993           fprintf (dump_file, ") ");
3994           print_generic_expr (dump_file, rhs2);
3995           fprintf (dump_file, "\n");
3996         }
3997
3998       operand_entry *oe = (*ops)[ranges[i].idx];
3999       ranges[i].in_p = 0;
4000       if (opcode == BIT_IOR_EXPR
4001           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4002         {
4003           ranges[i].in_p = 1;
4004           ccode = invert_tree_comparison (ccode, false);
4005         }
4006
4007       unsigned int uid = gimple_uid (stmt);
4008       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4009       gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4010       gimple_set_uid (g, uid);
4011       rhs1 = gimple_assign_lhs (g);
4012       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4013       if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4014         {
4015           g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4016           gimple_set_uid (g, uid);
4017           rhs2 = gimple_assign_lhs (g);
4018           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4019         }
4020       if (tree_swap_operands_p (rhs1, rhs2))
4021         {
4022           std::swap (rhs1, rhs2);
4023           ccode = swap_tree_comparison (ccode);
4024         }
4025       if (gimple_code (stmt) == GIMPLE_COND)
4026         {
4027           gcond *c = as_a <gcond *> (stmt);
4028           gimple_cond_set_code (c, ccode);
4029           gimple_cond_set_lhs (c, rhs1);
4030           gimple_cond_set_rhs (c, rhs2);
4031           update_stmt (stmt);
4032         }
4033       else
4034         {
4035           tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4036           if (!INTEGRAL_TYPE_P (ctype)
4037               || (TREE_CODE (ctype) != BOOLEAN_TYPE
4038                   && TYPE_PRECISION (ctype) != 1))
4039             ctype = boolean_type_node;
4040           g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4041           gimple_set_uid (g, uid);
4042           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4043           if (oe->op && ctype != TREE_TYPE (oe->op))
4044             {
4045               g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4046                                        NOP_EXPR, gimple_assign_lhs (g));
4047               gimple_set_uid (g, uid);
4048               gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4049             }
4050           ranges[i].exp = gimple_assign_lhs (g);
4051           oe->op = ranges[i].exp;
4052           ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4053           ranges[i].high = ranges[i].low;
4054         }
4055       ranges[i].strict_overflow_p = false;
4056       oe = (*ops)[ranges[*idx].idx];
4057       /* Now change all the other range test immediate uses, so that
4058          those tests will be optimized away.  */
4059       if (opcode == ERROR_MARK)
4060         {
4061           if (oe->op)
4062             oe->op = build_int_cst (TREE_TYPE (oe->op),
4063                                     oe->rank == BIT_IOR_EXPR ? 0 : 1);
4064           else
4065             oe->op = (oe->rank == BIT_IOR_EXPR
4066                       ? boolean_false_node : boolean_true_node);
4067         }
4068       else
4069         oe->op = error_mark_node;
4070       ranges[*idx].exp = NULL_TREE;
4071       ranges[*idx].low = NULL_TREE;
4072       ranges[*idx].high = NULL_TREE;
4073       any_changes = true;
4074     }
4075
4076   delete map;
4077   return any_changes;
4078 }
4079
4080 /* Optimize range tests, similarly how fold_range_test optimizes
4081    it on trees.  The tree code for the binary
4082    operation between all the operands is OPCODE.
4083    If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4084    maybe_optimize_range_tests for inter-bb range optimization.
4085    In that case if oe->op is NULL, oe->id is bb->index whose
4086    GIMPLE_COND is && or ||ed into the test, and oe->rank says
4087    the actual opcode.
4088    FIRST_BB is the first basic block if OPCODE is ERROR_MARK.  */
4089
4090 static bool
4091 optimize_range_tests (enum tree_code opcode,
4092                       vec<operand_entry *> *ops, basic_block first_bb)
4093 {
4094   unsigned int length = ops->length (), i, j, first;
4095   operand_entry *oe;
4096   struct range_entry *ranges;
4097   bool any_changes = false;
4098
4099   if (length == 1)
4100     return false;
4101
4102   ranges = XNEWVEC (struct range_entry, length);
4103   for (i = 0; i < length; i++)
4104     {
4105       oe = (*ops)[i];
4106       ranges[i].idx = i;
4107       init_range_entry (ranges + i, oe->op,
4108                         oe->op
4109                         ? NULL
4110                         : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4111       /* For | invert it now, we will invert it again before emitting
4112          the optimized expression.  */
4113       if (opcode == BIT_IOR_EXPR
4114           || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4115         ranges[i].in_p = !ranges[i].in_p;
4116     }
4117
4118   qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4119   for (i = 0; i < length; i++)
4120     if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4121       break;
4122
4123   /* Try to merge ranges.  */
4124   for (first = i; i < length; i++)
4125     {
4126       tree low = ranges[i].low;
4127       tree high = ranges[i].high;
4128       int in_p = ranges[i].in_p;
4129       bool strict_overflow_p = ranges[i].strict_overflow_p;
4130       int update_fail_count = 0;
4131
4132       for (j = i + 1; j < length; j++)
4133         {
4134           if (ranges[i].exp != ranges[j].exp)
4135             break;
4136           if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4137                              ranges[j].in_p, ranges[j].low, ranges[j].high))
4138             break;
4139           strict_overflow_p |= ranges[j].strict_overflow_p;
4140         }
4141
4142       if (j == i + 1)
4143         continue;
4144
4145       if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4146                              opcode, ops, ranges[i].exp, NULL, in_p,
4147                              low, high, strict_overflow_p))
4148         {
4149           i = j - 1;
4150           any_changes = true;
4151         }
4152       /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4153          while update_range_test would fail.  */
4154       else if (update_fail_count == 64)
4155         i = j - 1;
4156       else
4157         ++update_fail_count;
4158     }
4159
4160   any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4161                                          ops, ranges);
4162
4163   if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4164     any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4165                                            ops, ranges);
4166   if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4167     any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4168                                                      ops, ranges);
4169   any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4170                                                  ranges, first_bb);
4171   any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4172                                                    ops, ranges);
4173
4174   if (any_changes && opcode != ERROR_MARK)
4175     {
4176       j = 0;
4177       FOR_EACH_VEC_ELT (*ops, i, oe)
4178         {
4179           if (oe->op == error_mark_node)
4180             continue;
4181           else if (i != j)
4182             (*ops)[j] = oe;
4183           j++;
4184         }
4185       ops->truncate (j);
4186     }
4187
4188   XDELETEVEC (ranges);
4189   return any_changes;
4190 }
4191
4192 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4193    the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
4194    otherwise the comparison code.  TYPE is a return value that is set
4195    to type of comparison.  */
4196
4197 static tree_code
4198 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4199                   tree *lhs, tree *rhs, gassign **vcond)
4200 {
4201   if (TREE_CODE (var) != SSA_NAME)
4202     return ERROR_MARK;
4203
4204   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4205   if (stmt == NULL)
4206     return ERROR_MARK;
4207   if (vcond)
4208     *vcond = stmt;
4209
4210   /* ??? If we start creating more COND_EXPR, we could perform
4211      this same optimization with them.  For now, simplify.  */
4212   if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4213     return ERROR_MARK;
4214
4215   tree cond = gimple_assign_rhs1 (stmt);
4216   tree_code cmp = TREE_CODE (cond);
4217   if (cmp != SSA_NAME)
4218     return ERROR_MARK;
4219
4220   gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4221   if (assign == NULL
4222       || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4223     return ERROR_MARK;
4224
4225   cmp = gimple_assign_rhs_code (assign);
4226   if (lhs)
4227     *lhs = gimple_assign_rhs1 (assign);
4228   if (rhs)
4229     *rhs = gimple_assign_rhs2 (assign);
4230
4231   /* ??? For now, allow only canonical true and false result vectors.
4232      We could expand this to other constants should the need arise,
4233      but at the moment we don't create them.  */
4234   tree t = gimple_assign_rhs2 (stmt);
4235   tree f = gimple_assign_rhs3 (stmt);
4236   bool inv;
4237   if (integer_all_onesp (t))
4238     inv = false;
4239   else if (integer_all_onesp (f))
4240     {
4241       cmp = invert_tree_comparison (cmp, false);
4242       inv = true;
4243     }
4244   else
4245     return ERROR_MARK;
4246   if (!integer_zerop (f))
4247     return ERROR_MARK;
4248
4249   /* Success!  */
4250   if (rets)
4251     *rets = assign;
4252   if (reti)
4253     *reti = inv;
4254   if (type)
4255     *type = TREE_TYPE (cond);
4256   return cmp;
4257 }
4258
4259 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4260    with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR).  */
4261
4262 static bool
4263 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4264 {
4265   unsigned int length = ops->length (), i, j;
4266   bool any_changes = false;
4267
4268   if (length == 1)
4269     return false;
4270
4271   for (i = 0; i < length; ++i)
4272     {
4273       tree elt0 = (*ops)[i]->op;
4274
4275       gassign *stmt0, *vcond0;
4276       bool invert;
4277       tree type, lhs0, rhs0;
4278       tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4279                                          &rhs0, &vcond0);
4280       if (cmp0 == ERROR_MARK)
4281         continue;
4282
4283       for (j = i + 1; j < length; ++j)
4284         {
4285           tree &elt1 = (*ops)[j]->op;
4286
4287           gassign *stmt1, *vcond1;
4288           tree lhs1, rhs1;
4289           tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4290                                              &rhs1, &vcond1);
4291           if (cmp1 == ERROR_MARK)
4292             continue;
4293
4294           tree comb;
4295           if (opcode == BIT_AND_EXPR)
4296             comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4297                                                cmp1, lhs1, rhs1);
4298           else if (opcode == BIT_IOR_EXPR)
4299             comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4300                                               cmp1, lhs1, rhs1);
4301           else
4302             gcc_unreachable ();
4303           if (comb == NULL)
4304             continue;
4305
4306           /* Success! */
4307           if (dump_file && (dump_flags & TDF_DETAILS))
4308             {
4309               fprintf (dump_file, "Transforming ");
4310               print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4311               fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4312               print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4313               fprintf (dump_file, " into ");
4314               print_generic_expr (dump_file, comb);
4315               fputc ('\n', dump_file);
4316             }
4317
4318           gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4319           tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4320                                                true, GSI_SAME_STMT);
4321           if (invert)
4322             swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4323                                gimple_assign_rhs3_ptr (vcond0));
4324           gimple_assign_set_rhs1 (vcond0, exp);
4325           update_stmt (vcond0);
4326
4327           elt1 = error_mark_node;
4328           any_changes = true;
4329         }
4330     }
4331
4332   if (any_changes)
4333     {
4334       operand_entry *oe;
4335       j = 0;
4336       FOR_EACH_VEC_ELT (*ops, i, oe)
4337         {
4338           if (oe->op == error_mark_node)
4339             continue;
4340           else if (i != j)
4341             (*ops)[j] = oe;
4342           j++;
4343         }
4344       ops->truncate (j);
4345     }
4346
4347   return any_changes;
4348 }
4349
4350 /* Return true if STMT is a cast like:
4351    <bb N>:
4352    ...
4353    _123 = (int) _234;
4354
4355    <bb M>:
4356    # _345 = PHI <_123(N), 1(...), 1(...)>
4357    where _234 has bool type, _123 has single use and
4358    bb N has a single successor M.  This is commonly used in
4359    the last block of a range test.
4360
4361    Also Return true if STMT is tcc_compare like:
4362    <bb N>:
4363    ...
4364    _234 = a_2(D) == 2;
4365
4366    <bb M>:
4367    # _345 = PHI <_234(N), 1(...), 1(...)>
4368    _346 = (int) _345;
4369    where _234 has booltype, single use and
4370    bb N has a single successor M.  This is commonly used in
4371    the last block of a range test.  */
4372
4373 static bool
4374 final_range_test_p (gimple *stmt)
4375 {
4376   basic_block bb, rhs_bb, lhs_bb;
4377   edge e;
4378   tree lhs, rhs;
4379   use_operand_p use_p;
4380   gimple *use_stmt;
4381
4382   if (!gimple_assign_cast_p (stmt)
4383       && (!is_gimple_assign (stmt)
4384           || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4385               != tcc_comparison)))
4386     return false;
4387   bb = gimple_bb (stmt);
4388   if (!single_succ_p (bb))
4389     return false;
4390   e = single_succ_edge (bb);
4391   if (e->flags & EDGE_COMPLEX)
4392     return false;
4393
4394   lhs = gimple_assign_lhs (stmt);
4395   rhs = gimple_assign_rhs1 (stmt);
4396   if (gimple_assign_cast_p (stmt)
4397       && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4398           || TREE_CODE (rhs) != SSA_NAME
4399           || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4400     return false;
4401
4402   if (!gimple_assign_cast_p (stmt)
4403       && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4404       return false;
4405
4406   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
4407   if (!single_imm_use (lhs, &use_p, &use_stmt))
4408     return false;
4409
4410   if (gimple_code (use_stmt) != GIMPLE_PHI
4411       || gimple_bb (use_stmt) != e->dest)
4412     return false;
4413
4414   /* And that the rhs is defined in the same loop.  */
4415   if (gimple_assign_cast_p (stmt))
4416     {
4417       if (TREE_CODE (rhs) != SSA_NAME
4418           || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4419           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4420         return false;
4421     }
4422   else
4423     {
4424       if (TREE_CODE (lhs) != SSA_NAME
4425           || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4426           || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4427         return false;
4428     }
4429
4430   return true;
4431 }
4432
4433 /* Return true if BB is suitable basic block for inter-bb range test
4434    optimization.  If BACKWARD is true, BB should be the only predecessor
4435    of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4436    or compared with to find a common basic block to which all conditions
4437    branch to if true resp. false.  If BACKWARD is false, TEST_BB should
4438    be the only predecessor of BB.  *TEST_SWAPPED_P is set to true if
4439    TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4440    block points to an empty block that falls through into *OTHER_BB and
4441    the phi args match that path.  */
4442
4443 static bool
4444 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4445                   bool *test_swapped_p, bool backward)
4446 {
4447   edge_iterator ei, ei2;
4448   edge e, e2;
4449   gimple *stmt;
4450   gphi_iterator gsi;
4451   bool other_edge_seen = false;
4452   bool is_cond;
4453
4454   if (test_bb == bb)
4455     return false;
4456   /* Check last stmt first.  */
4457   stmt = last_nondebug_stmt (bb);
4458   if (stmt == NULL
4459       || (gimple_code (stmt) != GIMPLE_COND
4460           && (backward || !final_range_test_p (stmt)))
4461       || gimple_visited_p (stmt)
4462       || stmt_could_throw_p (cfun, stmt)
4463       || *other_bb == bb)
4464     return false;
4465   is_cond = gimple_code (stmt) == GIMPLE_COND;
4466   if (is_cond)
4467     {
4468       /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4469          goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4470          to *OTHER_BB (if not set yet, try to find it out).  */
4471       if (EDGE_COUNT (bb->succs) != 2)
4472         return false;
4473       FOR_EACH_EDGE (e, ei, bb->succs)
4474         {
4475           if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4476             return false;
4477           if (e->dest == test_bb)
4478             {
4479               if (backward)
4480                 continue;
4481               else
4482                 return false;
4483             }
4484           if (e->dest == bb)
4485             return false;
4486           if (*other_bb == NULL)
4487             {
4488               FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4489                 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4490                   return false;
4491                 else if (e->dest == e2->dest)
4492                   *other_bb = e->dest;
4493               if (*other_bb == NULL)
4494                 return false;
4495             }
4496           if (e->dest == *other_bb)
4497             other_edge_seen = true;
4498           else if (backward)
4499             return false;
4500         }
4501       if (*other_bb == NULL || !other_edge_seen)
4502         return false;
4503     }
4504   else if (single_succ (bb) != *other_bb)
4505     return false;
4506
4507   /* Now check all PHIs of *OTHER_BB.  */
4508   e = find_edge (bb, *other_bb);
4509   e2 = find_edge (test_bb, *other_bb);
4510  retry:;
4511   for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4512     {
4513       gphi *phi = gsi.phi ();
4514       /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4515          corresponding to BB and TEST_BB predecessor must be the same.  */
4516       if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4517                             gimple_phi_arg_def (phi, e2->dest_idx), 0))
4518         {
4519           /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4520              one of the PHIs should have the lhs of the last stmt in
4521              that block as PHI arg and that PHI should have 0 or 1
4522              corresponding to it in all other range test basic blocks
4523              considered.  */
4524           if (!is_cond)
4525             {
4526               if (gimple_phi_arg_def (phi, e->dest_idx)
4527                   == gimple_assign_lhs (stmt)
4528                   && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4529                       || integer_onep (gimple_phi_arg_def (phi,
4530                                                            e2->dest_idx))))
4531                 continue;
4532             }
4533           else
4534             {
4535               gimple *test_last = last_nondebug_stmt (test_bb);
4536               if (gimple_code (test_last) == GIMPLE_COND)
4537                 {
4538                   if (backward ? e2->src != test_bb : e->src != bb)
4539                     return false;
4540
4541                   /* For last_bb, handle also:
4542                      if (x_3(D) == 3)
4543                        goto <bb 6>; [34.00%]
4544                      else
4545                        goto <bb 7>; [66.00%]
4546
4547                      <bb 6> [local count: 79512730]:
4548
4549                      <bb 7> [local count: 1073741824]:
4550                      # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4551                      where bb 7 is *OTHER_BB, but the PHI values from the
4552                      earlier bbs match the path through the empty bb
4553                      in between.  */
4554                   edge e3;
4555                   if (backward)
4556                     e3 = EDGE_SUCC (test_bb,
4557                                     e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4558                   else
4559                     e3 = EDGE_SUCC (bb,
4560                                     e == EDGE_SUCC (bb, 0) ? 1 : 0);
4561                   if (empty_block_p (e3->dest)
4562                       && single_succ_p (e3->dest)
4563                       && single_succ (e3->dest) == *other_bb
4564                       && single_pred_p (e3->dest)
4565                       && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4566                     {
4567                       if (backward)
4568                         e2 = single_succ_edge (e3->dest);
4569                       else
4570                         e = single_succ_edge (e3->dest);
4571                       if (test_swapped_p)
4572                         *test_swapped_p = true;
4573                       goto retry;
4574                     }
4575                 }
4576               else if (gimple_phi_arg_def (phi, e2->dest_idx)
4577                        == gimple_assign_lhs (test_last)
4578                        && (integer_zerop (gimple_phi_arg_def (phi,
4579                                                               e->dest_idx))
4580                            || integer_onep (gimple_phi_arg_def (phi,
4581                                                                 e->dest_idx))))
4582                 continue;
4583             }
4584
4585           return false;
4586         }
4587     }
4588   return true;
4589 }
4590
4591 /* Return true if BB doesn't have side-effects that would disallow
4592    range test optimization, all SSA_NAMEs set in the bb are consumed
4593    in the bb and there are no PHIs.  */
4594
4595 bool
4596 no_side_effect_bb (basic_block bb)
4597 {
4598   gimple_stmt_iterator gsi;
4599   gimple *last;
4600
4601   if (!gimple_seq_empty_p (phi_nodes (bb)))
4602     return false;
4603   last = last_nondebug_stmt (bb);
4604   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4605     {
4606       gimple *stmt = gsi_stmt (gsi);
4607       tree lhs;
4608       imm_use_iterator imm_iter;
4609       use_operand_p use_p;
4610
4611       if (is_gimple_debug (stmt))
4612         continue;
4613       if (gimple_has_side_effects (stmt))
4614         return false;
4615       if (stmt == last)
4616         return true;
4617       if (!is_gimple_assign (stmt))
4618         return false;
4619       lhs = gimple_assign_lhs (stmt);
4620       if (TREE_CODE (lhs) != SSA_NAME)
4621         return false;
4622       if (gimple_assign_rhs_could_trap_p (stmt))
4623         return false;
4624       FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4625         {
4626           gimple *use_stmt = USE_STMT (use_p);
4627           if (is_gimple_debug (use_stmt))
4628             continue;
4629           if (gimple_bb (use_stmt) != bb)
4630             return false;
4631         }
4632     }
4633   return false;
4634 }
4635
4636 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4637    return true and fill in *OPS recursively.  */
4638
4639 static bool
4640 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4641          class loop *loop)
4642 {
4643   gimple *stmt = SSA_NAME_DEF_STMT (var);
4644   tree rhs[2];
4645   int i;
4646
4647   if (!is_reassociable_op (stmt, code, loop))
4648     return false;
4649
4650   rhs[0] = gimple_assign_rhs1 (stmt);
4651   rhs[1] = gimple_assign_rhs2 (stmt);
4652   gimple_set_visited (stmt, true);
4653   for (i = 0; i < 2; i++)
4654     if (TREE_CODE (rhs[i]) == SSA_NAME
4655         && !get_ops (rhs[i], code, ops, loop)
4656         && has_single_use (rhs[i]))
4657       {
4658         operand_entry *oe = operand_entry_pool.allocate ();
4659
4660         oe->op = rhs[i];
4661         oe->rank = code;
4662         oe->id = 0;
4663         oe->count = 1;
4664         oe->stmt_to_insert = NULL;
4665         ops->safe_push (oe);
4666       }
4667   return true;
4668 }
4669
4670 /* Find the ops that were added by get_ops starting from VAR, see if
4671    they were changed during update_range_test and if yes, create new
4672    stmts.  */
4673
4674 static tree
4675 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4676             unsigned int *pidx, class loop *loop)
4677 {
4678   gimple *stmt = SSA_NAME_DEF_STMT (var);
4679   tree rhs[4];
4680   int i;
4681
4682   if (!is_reassociable_op (stmt, code, loop))
4683     return NULL;
4684
4685   rhs[0] = gimple_assign_rhs1 (stmt);
4686   rhs[1] = gimple_assign_rhs2 (stmt);
4687   rhs[2] = rhs[0];
4688   rhs[3] = rhs[1];
4689   for (i = 0; i < 2; i++)
4690     if (TREE_CODE (rhs[i]) == SSA_NAME)
4691       {
4692         rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4693         if (rhs[2 + i] == NULL_TREE)
4694           {
4695             if (has_single_use (rhs[i]))
4696               rhs[2 + i] = ops[(*pidx)++]->op;
4697             else
4698               rhs[2 + i] = rhs[i];
4699           }
4700       }
4701   if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4702       && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4703     {
4704       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4705       var = make_ssa_name (TREE_TYPE (var));
4706       gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4707                                         rhs[2], rhs[3]);
4708       gimple_set_uid (g, gimple_uid (stmt));
4709       gimple_set_visited (g, true);
4710       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4711       gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4712       if (fold_stmt_inplace (&gsi2))
4713         update_stmt (g);
4714     }
4715   return var;
4716 }
4717
4718 /* Structure to track the initial value passed to get_ops and
4719    the range in the ops vector for each basic block.  */
4720
4721 struct inter_bb_range_test_entry
4722 {
4723   tree op;
4724   unsigned int first_idx, last_idx;
4725 };
4726
4727 /* Inter-bb range test optimization.
4728
4729    Returns TRUE if a gimple conditional is optimized to a true/false,
4730    otherwise return FALSE.
4731
4732    This indicates to the caller that it should run a CFG cleanup pass
4733    once reassociation is completed.  */
4734
4735 static bool
4736 maybe_optimize_range_tests (gimple *stmt)
4737 {
4738   basic_block first_bb = gimple_bb (stmt);
4739   basic_block last_bb = first_bb;
4740   basic_block other_bb = NULL;
4741   basic_block bb;
4742   edge_iterator ei;
4743   edge e;
4744   auto_vec<operand_entry *> ops;
4745   auto_vec<inter_bb_range_test_entry> bbinfo;
4746   bool any_changes = false;
4747   bool cfg_cleanup_needed = false;
4748
4749   /* Consider only basic blocks that end with GIMPLE_COND or
4750      a cast statement satisfying final_range_test_p.  All
4751      but the last bb in the first_bb .. last_bb range
4752      should end with GIMPLE_COND.  */
4753   if (gimple_code (stmt) == GIMPLE_COND)
4754     {
4755       if (EDGE_COUNT (first_bb->succs) != 2)
4756         return cfg_cleanup_needed;
4757     }
4758   else if (final_range_test_p (stmt))
4759     other_bb = single_succ (first_bb);
4760   else
4761     return cfg_cleanup_needed;
4762
4763   if (stmt_could_throw_p (cfun, stmt))
4764     return cfg_cleanup_needed;
4765
4766   /* As relative ordering of post-dominator sons isn't fixed,
4767      maybe_optimize_range_tests can be called first on any
4768      bb in the range we want to optimize.  So, start searching
4769      backwards, if first_bb can be set to a predecessor.  */
4770   while (single_pred_p (first_bb))
4771     {
4772       basic_block pred_bb = single_pred (first_bb);
4773       if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4774         break;
4775       if (!no_side_effect_bb (first_bb))
4776         break;
4777       first_bb = pred_bb;
4778     }
4779   /* If first_bb is last_bb, other_bb hasn't been computed yet.
4780      Before starting forward search in last_bb successors, find
4781      out the other_bb.  */
4782   if (first_bb == last_bb)
4783     {
4784       other_bb = NULL;
4785       /* As non-GIMPLE_COND last stmt always terminates the range,
4786          if forward search didn't discover anything, just give up.  */
4787       if (gimple_code (stmt) != GIMPLE_COND)
4788         return cfg_cleanup_needed;
4789       /* Look at both successors.  Either it ends with a GIMPLE_COND
4790          and satisfies suitable_cond_bb, or ends with a cast and
4791          other_bb is that cast's successor.  */
4792       FOR_EACH_EDGE (e, ei, first_bb->succs)
4793         if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4794             || e->dest == first_bb)
4795           return cfg_cleanup_needed;
4796         else if (single_pred_p (e->dest))
4797           {
4798             stmt = last_nondebug_stmt (e->dest);
4799             if (stmt
4800                 && gimple_code (stmt) == GIMPLE_COND
4801                 && EDGE_COUNT (e->dest->succs) == 2)
4802               {
4803                 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4804                                       NULL, true))
4805                   break;
4806                 else
4807                   other_bb = NULL;
4808               }
4809             else if (stmt
4810                      && final_range_test_p (stmt)
4811                      && find_edge (first_bb, single_succ (e->dest)))
4812               {
4813                 other_bb = single_succ (e->dest);
4814                 if (other_bb == first_bb)
4815                   other_bb = NULL;
4816               }
4817           }
4818       if (other_bb == NULL)
4819         return cfg_cleanup_needed;
4820     }
4821   /* Now do the forward search, moving last_bb to successor bbs
4822      that aren't other_bb.  */
4823   while (EDGE_COUNT (last_bb->succs) == 2)
4824     {
4825       FOR_EACH_EDGE (e, ei, last_bb->succs)
4826         if (e->dest != other_bb)
4827           break;
4828       if (e == NULL)
4829         break;
4830       if (!single_pred_p (e->dest))
4831         break;
4832       if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4833         break;
4834       if (!no_side_effect_bb (e->dest))
4835         break;
4836       last_bb = e->dest;
4837     }
4838   if (first_bb == last_bb)
4839     return cfg_cleanup_needed;
4840   /* Here basic blocks first_bb through last_bb's predecessor
4841      end with GIMPLE_COND, all of them have one of the edges to
4842      other_bb and another to another block in the range,
4843      all blocks except first_bb don't have side-effects and
4844      last_bb ends with either GIMPLE_COND, or cast satisfying
4845      final_range_test_p.  */
4846   for (bb = last_bb; ; bb = single_pred (bb))
4847     {
4848       enum tree_code code;
4849       tree lhs, rhs;
4850       inter_bb_range_test_entry bb_ent;
4851
4852       bb_ent.op = NULL_TREE;
4853       bb_ent.first_idx = ops.length ();
4854       bb_ent.last_idx = bb_ent.first_idx;
4855       e = find_edge (bb, other_bb);
4856       stmt = last_nondebug_stmt (bb);
4857       gimple_set_visited (stmt, true);
4858       if (gimple_code (stmt) != GIMPLE_COND)
4859         {
4860           use_operand_p use_p;
4861           gimple *phi;
4862           edge e2;
4863           unsigned int d;
4864
4865           lhs = gimple_assign_lhs (stmt);
4866           rhs = gimple_assign_rhs1 (stmt);
4867           gcc_assert (bb == last_bb);
4868
4869           /* stmt is
4870              _123 = (int) _234;
4871              OR
4872              _234 = a_2(D) == 2;
4873
4874              followed by:
4875              <bb M>:
4876              # _345 = PHI <_123(N), 1(...), 1(...)>
4877
4878              or 0 instead of 1.  If it is 0, the _234
4879              range test is anded together with all the
4880              other range tests, if it is 1, it is ored with
4881              them.  */
4882           single_imm_use (lhs, &use_p, &phi);
4883           gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4884           e2 = find_edge (first_bb, other_bb);
4885           d = e2->dest_idx;
4886           gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4887           if (integer_zerop (gimple_phi_arg_def (phi, d)))
4888             code = BIT_AND_EXPR;
4889           else
4890             {
4891               gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4892               code = BIT_IOR_EXPR;
4893             }
4894
4895           /* If _234 SSA_NAME_DEF_STMT is
4896              _234 = _567 | _789;
4897              (or &, corresponding to 1/0 in the phi arguments,
4898              push into ops the individual range test arguments
4899              of the bitwise or resp. and, recursively.  */
4900           if (TREE_CODE (rhs) == SSA_NAME
4901               && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4902                   != tcc_comparison)
4903               && !get_ops (rhs, code, &ops,
4904                            loop_containing_stmt (stmt))
4905               && has_single_use (rhs))
4906             {
4907               /* Otherwise, push the _234 range test itself.  */
4908               operand_entry *oe = operand_entry_pool.allocate ();
4909
4910               oe->op = rhs;
4911               oe->rank = code;
4912               oe->id = 0;
4913               oe->count = 1;
4914               oe->stmt_to_insert = NULL;
4915               ops.safe_push (oe);
4916               bb_ent.last_idx++;
4917               bb_ent.op = rhs;
4918             }
4919           else if (is_gimple_assign (stmt)
4920                    && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4921                        == tcc_comparison)
4922                    && !get_ops (lhs, code, &ops,
4923                                 loop_containing_stmt (stmt))
4924                    && has_single_use (lhs))
4925             {
4926               operand_entry *oe = operand_entry_pool.allocate ();
4927               oe->op = lhs;
4928               oe->rank = code;
4929               oe->id = 0;
4930               oe->count = 1;
4931               ops.safe_push (oe);
4932               bb_ent.last_idx++;
4933               bb_ent.op = lhs;
4934             }
4935           else
4936             {
4937               bb_ent.last_idx = ops.length ();
4938               bb_ent.op = rhs;
4939             }
4940           bbinfo.safe_push (bb_ent);
4941           for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4942             ops[i]->id = bb->index;
4943           continue;
4944         }
4945       else if (bb == last_bb)
4946         {
4947           /* For last_bb, handle also:
4948              if (x_3(D) == 3)
4949                goto <bb 6>; [34.00%]
4950              else
4951                goto <bb 7>; [66.00%]
4952
4953              <bb 6> [local count: 79512730]:
4954
4955              <bb 7> [local count: 1073741824]:
4956              # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4957              where bb 7 is OTHER_BB, but the PHI values from the
4958              earlier bbs match the path through the empty bb
4959              in between.  */
4960           bool test_swapped_p = false;
4961           bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4962                                       &other_bb, &test_swapped_p, true);
4963           gcc_assert (ok);
4964           if (test_swapped_p)
4965             e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4966         }
4967       /* Otherwise stmt is GIMPLE_COND.  */
4968       code = gimple_cond_code (stmt);
4969       lhs = gimple_cond_lhs (stmt);
4970       rhs = gimple_cond_rhs (stmt);
4971       if (TREE_CODE (lhs) == SSA_NAME
4972           && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4973           && ((code != EQ_EXPR && code != NE_EXPR)
4974               || rhs != boolean_false_node
4975                  /* Either push into ops the individual bitwise
4976                     or resp. and operands, depending on which
4977                     edge is other_bb.  */
4978               || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4979                                  ^ (code == EQ_EXPR))
4980                                 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4981                            loop_containing_stmt (stmt))))
4982         {
4983           /* Or push the GIMPLE_COND stmt itself.  */
4984           operand_entry *oe = operand_entry_pool.allocate ();
4985
4986           oe->op = NULL;
4987           oe->rank = (e->flags & EDGE_TRUE_VALUE)
4988                      ? BIT_IOR_EXPR : BIT_AND_EXPR;
4989           /* oe->op = NULL signs that there is no SSA_NAME
4990              for the range test, and oe->id instead is the
4991              basic block number, at which's end the GIMPLE_COND
4992              is.  */
4993           oe->id = bb->index;
4994           oe->count = 1;
4995           oe->stmt_to_insert = NULL;
4996           ops.safe_push (oe);
4997           bb_ent.op = NULL;
4998           bb_ent.last_idx++;
4999         }
5000       else if (ops.length () > bb_ent.first_idx)
5001         {
5002           bb_ent.op = lhs;
5003           bb_ent.last_idx = ops.length ();
5004         }
5005       bbinfo.safe_push (bb_ent);
5006       for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
5007         ops[i]->id = bb->index;
5008       if (bb == first_bb)
5009         break;
5010     }
5011   if (ops.length () > 1)
5012     any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5013   if (any_changes)
5014     {
5015       unsigned int idx, max_idx = 0;
5016       /* update_ops relies on has_single_use predicates returning the
5017          same values as it did during get_ops earlier.  Additionally it
5018          never removes statements, only adds new ones and it should walk
5019          from the single imm use and check the predicate already before
5020          making those changes.
5021          On the other side, the handling of GIMPLE_COND directly can turn
5022          previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5023          it needs to be done in a separate loop afterwards.  */
5024       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5025         {
5026           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5027               && bbinfo[idx].op != NULL_TREE)
5028             {
5029               tree new_op;
5030
5031               max_idx = idx;
5032               stmt = last_nondebug_stmt (bb);
5033               new_op = update_ops (bbinfo[idx].op,
5034                                    (enum tree_code)
5035                                    ops[bbinfo[idx].first_idx]->rank,
5036                                    ops, &bbinfo[idx].first_idx,
5037                                    loop_containing_stmt (stmt));
5038               if (new_op == NULL_TREE)
5039                 {
5040                   gcc_assert (bb == last_bb);
5041                   new_op = ops[bbinfo[idx].first_idx++]->op;
5042                 }
5043               if (bbinfo[idx].op != new_op)
5044                 {
5045                   imm_use_iterator iter;
5046                   use_operand_p use_p;
5047                   gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5048
5049                   FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5050                     if (is_gimple_debug (use_stmt))
5051                       continue;
5052                     else if (gimple_code (use_stmt) == GIMPLE_COND
5053                              || gimple_code (use_stmt) == GIMPLE_PHI)
5054                       FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5055                         SET_USE (use_p, new_op);
5056                     else if ((is_gimple_assign (use_stmt)
5057                               && (TREE_CODE_CLASS
5058                                   (gimple_assign_rhs_code (use_stmt))
5059                                   == tcc_comparison)))
5060                       cast_or_tcc_cmp_stmt = use_stmt;
5061                     else if (gimple_assign_cast_p (use_stmt))
5062                       cast_or_tcc_cmp_stmt = use_stmt;
5063                     else
5064                       gcc_unreachable ();
5065
5066                   if (cast_or_tcc_cmp_stmt)
5067                     {
5068                       gcc_assert (bb == last_bb);
5069                       tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5070                       tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5071                       enum tree_code rhs_code
5072                         = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5073                         ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5074                         : CONVERT_EXPR;
5075                       gassign *g;
5076                       if (is_gimple_min_invariant (new_op))
5077                         {
5078                           new_op = fold_convert (TREE_TYPE (lhs), new_op);
5079                           g = gimple_build_assign (new_lhs, new_op);
5080                         }
5081                       else
5082                         g = gimple_build_assign (new_lhs, rhs_code, new_op);
5083                       gimple_stmt_iterator gsi
5084                         = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5085                       gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5086                       gimple_set_visited (g, true);
5087                       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5088                       FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5089                         if (is_gimple_debug (use_stmt))
5090                           continue;
5091                         else if (gimple_code (use_stmt) == GIMPLE_COND
5092                                  || gimple_code (use_stmt) == GIMPLE_PHI)
5093                           FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5094                             SET_USE (use_p, new_lhs);
5095                         else
5096                           gcc_unreachable ();
5097                     }
5098                 }
5099             }
5100           if (bb == first_bb)
5101             break;
5102         }
5103       for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5104         {
5105           if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5106               && bbinfo[idx].op == NULL_TREE
5107               && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5108             {
5109               gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5110
5111               if (idx > max_idx)
5112                 max_idx = idx;
5113
5114               /* If we collapse the conditional to a true/false
5115                  condition, then bubble that knowledge up to our caller.  */
5116               if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5117                 {
5118                   gimple_cond_make_false (cond_stmt);
5119                   cfg_cleanup_needed = true;
5120                 }
5121               else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5122                 {
5123                   gimple_cond_make_true (cond_stmt);
5124                   cfg_cleanup_needed = true;
5125                 }
5126               else
5127                 {
5128                   gimple_cond_set_code (cond_stmt, NE_EXPR);
5129                   gimple_cond_set_lhs (cond_stmt,
5130                                        ops[bbinfo[idx].first_idx]->op);
5131                   gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5132                 }
5133               update_stmt (cond_stmt);
5134             }
5135           if (bb == first_bb)
5136             break;
5137         }
5138
5139       /* The above changes could result in basic blocks after the first
5140          modified one, up to and including last_bb, to be executed even if
5141          they would not be in the original program.  If the value ranges of
5142          assignment lhs' in those bbs were dependent on the conditions
5143          guarding those basic blocks which now can change, the VRs might
5144          be incorrect.  As no_side_effect_bb should ensure those SSA_NAMEs
5145          are only used within the same bb, it should be not a big deal if
5146          we just reset all the VRs in those bbs.  See PR68671.  */
5147       for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5148         reset_flow_sensitive_info_in_bb (bb);
5149     }
5150   return cfg_cleanup_needed;
5151 }
5152
5153 /* Remove def stmt of VAR if VAR has zero uses and recurse
5154    on rhs1 operand if so.  */
5155
5156 static void
5157 remove_visited_stmt_chain (tree var)
5158 {
5159   gimple *stmt;
5160   gimple_stmt_iterator gsi;
5161
5162   while (1)
5163     {
5164       if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5165         return;
5166       stmt = SSA_NAME_DEF_STMT (var);
5167       if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5168         {
5169           var = gimple_assign_rhs1 (stmt);
5170           gsi = gsi_for_stmt (stmt);
5171           reassoc_remove_stmt (&gsi);
5172           release_defs (stmt);
5173         }
5174       else
5175         return;
5176     }
5177 }
5178
5179 /* This function checks three consequtive operands in
5180    passed operands vector OPS starting from OPINDEX and
5181    swaps two operands if it is profitable for binary operation
5182    consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5183
5184    We pair ops with the same rank if possible.  */
5185
5186 static void
5187 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5188                           unsigned int opindex)
5189 {
5190   operand_entry *oe1, *oe2, *oe3;
5191
5192   oe1 = ops[opindex];
5193   oe2 = ops[opindex + 1];
5194   oe3 = ops[opindex + 2];
5195
5196   if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5197     std::swap (*oe1, *oe3);
5198   else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5199     std::swap (*oe1, *oe2);
5200 }
5201
5202 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5203    two definitions, otherwise return STMT.  Sets INSERT_BEFORE to indicate
5204    whether RHS1 op RHS2 can be inserted before or needs to be inserted
5205    after the returned stmt.  */
5206
5207 static inline gimple *
5208 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5209 {
5210   insert_before = true;
5211   if (TREE_CODE (rhs1) == SSA_NAME
5212       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5213     {
5214       stmt = SSA_NAME_DEF_STMT (rhs1);
5215       insert_before = false;
5216     }
5217   if (TREE_CODE (rhs2) == SSA_NAME
5218       && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5219     {
5220       stmt = SSA_NAME_DEF_STMT (rhs2);
5221       insert_before = false;
5222     }
5223   return stmt;
5224 }
5225
5226 /* If the stmt that defines operand has to be inserted, insert it
5227    before the use.  */
5228 static void
5229 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5230 {
5231   gcc_assert (is_gimple_assign (stmt_to_insert));
5232   tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5233   tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5234   bool insert_before;
5235   gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5236   gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5237   gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5238
5239   /* If the insert point is not stmt, then insert_point would be
5240      the point where operand rhs1 or rhs2 is defined. In this case,
5241      stmt_to_insert has to be inserted afterwards. This would
5242      only happen when the stmt insertion point is flexible. */
5243   if (insert_before)
5244     gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5245   else
5246     insert_stmt_after (stmt_to_insert, insert_point);
5247 }
5248
5249
5250 /* Recursively rewrite our linearized statements so that the operators
5251    match those in OPS[OPINDEX], putting the computation in rank
5252    order.  Return new lhs.
5253    CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5254    the current stmt and during recursive invocations.
5255    NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5256    recursive invocations.  */
5257
5258 static tree
5259 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5260                    const vec<operand_entry *> &ops, bool changed,
5261                    bool next_changed)
5262 {
5263   tree rhs1 = gimple_assign_rhs1 (stmt);
5264   tree rhs2 = gimple_assign_rhs2 (stmt);
5265   tree lhs = gimple_assign_lhs (stmt);
5266   operand_entry *oe;
5267
5268   /* The final recursion case for this function is that you have
5269      exactly two operations left.
5270      If we had exactly one op in the entire list to start with, we
5271      would have never called this function, and the tail recursion
5272      rewrites them one at a time.  */
5273   if (opindex + 2 == ops.length ())
5274     {
5275       operand_entry *oe1, *oe2;
5276
5277       oe1 = ops[opindex];
5278       oe2 = ops[opindex + 1];
5279
5280       if (rhs1 != oe1->op || rhs2 != oe2->op)
5281         {
5282           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5283           unsigned int uid = gimple_uid (stmt);
5284
5285           if (dump_file && (dump_flags & TDF_DETAILS))
5286             {
5287               fprintf (dump_file, "Transforming ");
5288               print_gimple_stmt (dump_file, stmt, 0);
5289             }
5290
5291           /* If the stmt that defines operand has to be inserted, insert it
5292              before the use.  */
5293           if (oe1->stmt_to_insert)
5294             insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5295           if (oe2->stmt_to_insert)
5296             insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5297           /* Even when changed is false, reassociation could have e.g. removed
5298              some redundant operations, so unless we are just swapping the
5299              arguments or unless there is no change at all (then we just
5300              return lhs), force creation of a new SSA_NAME.  */
5301           if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5302             {
5303               bool insert_before;
5304               gimple *insert_point
5305                 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5306               lhs = make_ssa_name (TREE_TYPE (lhs));
5307               stmt
5308                 = gimple_build_assign (lhs, rhs_code,
5309                                        oe1->op, oe2->op);
5310               gimple_set_uid (stmt, uid);
5311               gimple_set_visited (stmt, true);
5312               if (insert_before)
5313                 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5314               else
5315                 insert_stmt_after (stmt, insert_point);
5316             }
5317           else
5318             {
5319               bool insert_before;
5320               gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5321                                                       insert_before)
5322                                    == stmt);
5323               gimple_assign_set_rhs1 (stmt, oe1->op);
5324               gimple_assign_set_rhs2 (stmt, oe2->op);
5325               update_stmt (stmt);
5326             }
5327
5328           if (rhs1 != oe1->op && rhs1 != oe2->op)
5329             remove_visited_stmt_chain (rhs1);
5330
5331           if (dump_file && (dump_flags & TDF_DETAILS))
5332             {
5333               fprintf (dump_file, " into ");
5334               print_gimple_stmt (dump_file, stmt, 0);
5335             }
5336         }
5337       return lhs;
5338     }
5339
5340   /* If we hit here, we should have 3 or more ops left.  */
5341   gcc_assert (opindex + 2 < ops.length ());
5342
5343   /* Rewrite the next operator.  */
5344   oe = ops[opindex];
5345
5346   /* If the stmt that defines operand has to be inserted, insert it
5347      before the use.  */
5348   if (oe->stmt_to_insert)
5349     insert_stmt_before_use (stmt, oe->stmt_to_insert);
5350
5351   /* Recurse on the LHS of the binary operator, which is guaranteed to
5352      be the non-leaf side.  */
5353   tree new_rhs1
5354     = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5355                          changed || oe->op != rhs2 || next_changed,
5356                          false);
5357
5358   if (oe->op != rhs2 || new_rhs1 != rhs1)
5359     {
5360       if (dump_file && (dump_flags & TDF_DETAILS))
5361         {
5362           fprintf (dump_file, "Transforming ");
5363           print_gimple_stmt (dump_file, stmt, 0);
5364         }
5365
5366       /* If changed is false, this is either opindex == 0
5367          or all outer rhs2's were equal to corresponding oe->op,
5368          and powi_result is NULL.
5369          That means lhs is equivalent before and after reassociation.
5370          Otherwise ensure the old lhs SSA_NAME is not reused and
5371          create a new stmt as well, so that any debug stmts will be
5372          properly adjusted.  */
5373       if (changed)
5374         {
5375           gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5376           unsigned int uid = gimple_uid (stmt);
5377           bool insert_before;
5378           gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5379                                                     insert_before);
5380
5381           lhs = make_ssa_name (TREE_TYPE (lhs));
5382           stmt = gimple_build_assign (lhs, rhs_code,
5383                                       new_rhs1, oe->op);
5384           gimple_set_uid (stmt, uid);
5385           gimple_set_visited (stmt, true);
5386           if (insert_before)
5387             gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5388           else
5389             insert_stmt_after (stmt, insert_point);
5390         }
5391       else
5392         {
5393           bool insert_before;
5394           gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5395                                                   insert_before)
5396                                == stmt);
5397           gimple_assign_set_rhs1 (stmt, new_rhs1);
5398           gimple_assign_set_rhs2 (stmt, oe->op);
5399           update_stmt (stmt);
5400         }
5401
5402       if (dump_file && (dump_flags & TDF_DETAILS))
5403         {
5404           fprintf (dump_file, " into ");
5405           print_gimple_stmt (dump_file, stmt, 0);
5406         }
5407     }
5408   return lhs;
5409 }
5410
5411 /* Find out how many cycles we need to compute statements chain.
5412    OPS_NUM holds number os statements in a chain.  CPU_WIDTH is a
5413    maximum number of independent statements we may execute per cycle.  */
5414
5415 static int
5416 get_required_cycles (int ops_num, int cpu_width)
5417 {
5418   int res;
5419   int elog;
5420   unsigned int rest;
5421
5422   /* While we have more than 2 * cpu_width operands
5423      we may reduce number of operands by cpu_width
5424      per cycle.  */
5425   res = ops_num / (2 * cpu_width);
5426
5427   /* Remained operands count may be reduced twice per cycle
5428      until we have only one operand.  */
5429   rest = (unsigned)(ops_num - res * cpu_width);
5430   elog = exact_log2 (rest);
5431   if (elog >= 0)
5432     res += elog;
5433   else
5434     res += floor_log2 (rest) + 1;
5435
5436   return res;
5437 }
5438
5439 /* Given that the target fully pipelines FMA instructions, return the latency
5440    of MULT_EXPRs that can't be hidden by the FMAs.  WIDTH is the number of
5441    pipes.  */
5442
5443 static inline int
5444 get_mult_latency_consider_fma (int ops_num, int mult_num, int width)
5445 {
5446   gcc_checking_assert (mult_num && mult_num <= ops_num);
5447
5448   /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5449      e.g:
5450
5451         A * B + C * D
5452         =>
5453         _1 = A * B;
5454         _2 = .FMA (C, D, _1);
5455
5456       Otherwise there's latency(MULT)*1 in the first FMA.  */
5457   return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1;
5458 }
5459
5460 /* Returns an optimal number of registers to use for computation of
5461    given statements.
5462
5463    LHS is the result ssa name of OPS.  MULT_NUM is number of sub-expressions
5464    that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs.  */
5465
5466 static int
5467 get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
5468                          enum tree_code opc, machine_mode mode)
5469 {
5470   int param_width = param_tree_reassoc_width;
5471   int width;
5472   int width_min;
5473   int cycles_best;
5474   int ops_num = ops->length ();
5475
5476   if (param_width > 0)
5477     width = param_width;
5478   else
5479     width = targetm.sched.reassociation_width (opc, mode);
5480
5481   if (width == 1)
5482     return width;
5483
5484   /* Get the minimal time required for sequence computation.  */
5485   cycles_best = get_required_cycles (ops_num, width);
5486
5487   /* Check if we may use less width and still compute sequence for
5488      the same time.  It will allow us to reduce registers usage.
5489      get_required_cycles is monotonically increasing with lower width
5490      so we can perform a binary search for the minimal width that still
5491      results in the optimal cycle count.  */
5492   width_min = 1;
5493
5494   /* If the target fully pipelines FMA instruction, the multiply part can start
5495      already if its operands are ready.  Assuming symmetric pipes are used for
5496      FMUL/FADD/FMA, then for a sequence of FMA like:
5497
5498         _8 = .FMA (_2, _3, _1);
5499         _9 = .FMA (_5, _4, _8);
5500         _10 = .FMA (_7, _6, _9);
5501
5502      , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5503      While with width=2:
5504
5505         _8 = _4 * _5;
5506         _9 = .FMA (_2, _3, _1);
5507         _10 = .FMA (_6, _7, _8);
5508         _11 = _9 + _10;
5509
5510      , it is latency(MULT)*2 + latency(ADD)*2.  Assuming latency(MULT) >=
5511      latency(ADD), the first variant is preferred.
5512
5513      Find out if we can get a smaller width considering FMA.
5514      Assume FMUL and FMA use the same units that can also do FADD.
5515      For other scenarios, such as when FMUL and FADD are using separated units,
5516      the following code may not apply.  */
5517
5518   int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
5519   if (width > 1 && mult_num && param_fully_pipelined_fma
5520       && width_mult <= width)
5521     {
5522       /* Latency of MULT_EXPRs.  */
5523       int lat_mul
5524         = get_mult_latency_consider_fma (ops_num, mult_num, width_mult);
5525
5526       /* Quick search might not apply.  So start from 1.  */
5527       for (int i = 1; i < width_mult; i++)
5528         {
5529           int lat_mul_new
5530             = get_mult_latency_consider_fma (ops_num, mult_num, i);
5531           int lat_add_new = get_required_cycles (ops_num, i);
5532
5533           /* Assume latency(MULT) >= latency(ADD).  */
5534           if (lat_mul - lat_mul_new >= lat_add_new - cycles_best)
5535             {
5536               width = i;
5537               break;
5538             }
5539         }
5540     }
5541   else
5542     {
5543       while (width > width_min)
5544         {
5545           int width_mid = (width + width_min) / 2;
5546
5547           if (get_required_cycles (ops_num, width_mid) == cycles_best)
5548             width = width_mid;
5549           else if (width_min < width_mid)
5550             width_min = width_mid;
5551           else
5552             break;
5553         }
5554     }
5555
5556   /* If there's loop dependent FMA result, return width=2 to avoid it.  This is
5557      better than skipping these FMA candidates in widening_mul.  */
5558   if (width == 1
5559       && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5560                    param_avoid_fma_max_bits))
5561     {
5562       /* Look for cross backedge dependency:
5563         1. LHS is a phi argument in the same basic block it is defined.
5564         2. And the result of the phi node is used in OPS.  */
5565       basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5566
5567       use_operand_p use_p;
5568       imm_use_iterator iter;
5569       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5570         if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5571           {
5572             if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5573                 != bb)
5574               continue;
5575             tree phi_result = gimple_phi_result (phi);
5576             operand_entry *oe;
5577             unsigned int j;
5578             FOR_EACH_VEC_ELT (*ops, j, oe)
5579               {
5580                 if (TREE_CODE (oe->op) != SSA_NAME)
5581                   continue;
5582
5583                 /* Result of phi is operand of PLUS_EXPR.  */
5584                 if (oe->op == phi_result)
5585                   return 2;
5586
5587                 /* Check is result of phi is operand of MULT_EXPR.  */
5588                 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5589                 if (is_gimple_assign (def_stmt)
5590                     && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5591                   {
5592                     tree rhs = gimple_assign_rhs1 (def_stmt);
5593                     if (TREE_CODE (rhs) == SSA_NAME)
5594                       {
5595                         if (rhs == phi_result)
5596                           return 2;
5597                         def_stmt = SSA_NAME_DEF_STMT (rhs);
5598                       }
5599                   }
5600                 if (is_gimple_assign (def_stmt)
5601                     && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5602                   {
5603                     if (gimple_assign_rhs1 (def_stmt) == phi_result
5604                         || gimple_assign_rhs2 (def_stmt) == phi_result)
5605                       return 2;
5606                   }
5607               }
5608           }
5609     }
5610
5611   return width;
5612 }
5613
5614 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops.  */
5615 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops.  */
5616 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops.  */
5617
5618 /* Rewrite statements with dependency chain with regard the chance to generate
5619    FMA.
5620    For the chain with FMA: Try to keep fma opportunity as much as possible.
5621    For the chain without FMA: Putting the computation in rank order and trying
5622    to allow operations to be executed in parallel.
5623    E.g.
5624    e + f + a * b + c * d;
5625
5626    ssa1 = e + a * b;
5627    ssa2 = f + c * d;
5628    ssa3 = ssa1 + ssa2;
5629
5630    This reassociation approach preserves the chance of fma generation as much
5631    as possible.
5632
5633    Another thing is to avoid adding loop-carried ops to long chains, otherwise
5634    the whole chain will have dependencies across the loop iteration.  Just keep
5635    loop-carried ops in a separate chain.
5636    E.g.
5637    x_1 = phi (x_0, x_2)
5638    y_1 = phi (y_0, y_2)
5639
5640    a + b + c + d + e + x1 + y1
5641
5642    SSA1 = a + b;
5643    SSA2 = c + d;
5644    SSA3 = SSA1 + e;
5645    SSA4 = SSA3 + SSA2;
5646    SSA5 = x1 + y1;
5647    SSA6 = SSA4 + SSA5;
5648  */
5649 static void
5650 rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5651                             const vec<operand_entry *> &ops)
5652 {
5653   enum tree_code opcode = gimple_assign_rhs_code (stmt);
5654   int op_num = ops.length ();
5655   int op_normal_num = op_num;
5656   gcc_assert (op_num > 0);
5657   int stmt_num = op_num - 1;
5658   gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5659   int i = 0, j = 0;
5660   tree tmp_op[2], op1;
5661   operand_entry *oe;
5662   gimple *stmt1 = NULL;
5663   tree last_rhs1 = gimple_assign_rhs1 (stmt);
5664   int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5665   int width_active = 0, width_count = 0;
5666   bool has_biased = false, ops_changed = false;
5667   auto_vec<operand_entry *> ops_normal;
5668   auto_vec<operand_entry *> ops_biased;
5669   vec<operand_entry *> *ops1;
5670
5671   /* We start expression rewriting from the top statements.
5672      So, in this loop we create a full list of statements
5673      we will work with.  */
5674   stmts[stmt_num - 1] = stmt;
5675   for (i = stmt_num - 2; i >= 0; i--)
5676     stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5677
5678   /* Avoid adding loop-carried ops to long chains, first filter out the
5679      loop-carried.  But we need to make sure that the length of the remainder
5680      is not less than 4, which is the smallest ops length we can break the
5681      dependency.  */
5682   FOR_EACH_VEC_ELT (ops, i, oe)
5683     {
5684       if (TREE_CODE (oe->op) == SSA_NAME
5685           && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5686           && op_normal_num > 4)
5687         {
5688           ops_biased.safe_push (oe);
5689           has_biased = true;
5690           op_normal_num --;
5691         }
5692       else
5693         ops_normal.safe_push (oe);
5694     }
5695
5696   /* Width should not be larger than ops length / 2, since we can not create
5697      more parallel dependency chains that exceeds such value.  */
5698   int width_normal = op_normal_num / 2;
5699   int width_biased = (op_num - op_normal_num) / 2;
5700   width_normal = width <= width_normal ? width : width_normal;
5701   width_biased = width <= width_biased ? width : width_biased;
5702
5703   ops1 = &ops_normal;
5704   width_count = width_active = width_normal;
5705
5706   /* Build parallel dependency chain according to width.  */
5707   for (i = 0; i < stmt_num; i++)
5708     {
5709       if (dump_file && (dump_flags & TDF_DETAILS))
5710         {
5711           fprintf (dump_file, "Transforming ");
5712           print_gimple_stmt (dump_file, stmts[i], 0);
5713         }
5714
5715       /* When the work of normal ops is over, but the loop is not over,
5716          continue to do biased ops.  */
5717       if (width_count == 0 && ops1 == &ops_normal)
5718         {
5719           ops1 = &ops_biased;
5720           width_count = width_active = width_biased;
5721           ops_changed = true;
5722         }
5723
5724       /* Swap the operands if no FMA in the chain.  */
5725       if (ops1->length () > 2 && !has_fma)
5726         swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5727
5728       if (i < width_active
5729           || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5730         {
5731           for (j = 0; j < 2; j++)
5732             {
5733               oe = ops1->pop ();
5734               tmp_op[j] = oe->op;
5735               /* If the stmt that defines operand has to be inserted, insert it
5736                  before the use.  */
5737               stmt1 = oe->stmt_to_insert;
5738               if (stmt1)
5739                 insert_stmt_before_use (stmts[i], stmt1);
5740               stmt1 = NULL;
5741             }
5742           stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5743                                         tmp_op[1],
5744                                         tmp_op[0],
5745                                         opcode);
5746           gimple_set_visited (stmts[i], true);
5747
5748         }
5749       else
5750         {
5751           /* We keep original statement only for the last one.  All others are
5752              recreated.  */
5753           if (!ops1->length ())
5754             {
5755               /* For biased length equal to 2.  */
5756               if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5757                 last_rhs2_stmt_index = i - 1;
5758
5759               /* When width_count == 2 and there is no biased, just finish.  */
5760               if (width_count == NORMAL_END_STMT && !has_biased)
5761                 {
5762                   last_rhs1_stmt_index = i - 1;
5763                   last_rhs2_stmt_index = i - 2;
5764                 }
5765               if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5766                 {
5767                   /* We keep original statement only for the last one.  All
5768                      others are recreated.  */
5769                   gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5770                                           (stmts[last_rhs1_stmt_index]));
5771                   gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5772                                           (stmts[last_rhs2_stmt_index]));
5773                   update_stmt (stmts[i]);
5774                 }
5775               else
5776                 {
5777                   stmts[i] =
5778                     build_and_add_sum (TREE_TYPE (last_rhs1),
5779                                        gimple_assign_lhs (stmts[i-width_count]),
5780                                        gimple_assign_lhs
5781                                        (stmts[i-width_count+1]),
5782                                        opcode);
5783                   gimple_set_visited (stmts[i], true);
5784                   width_count--;
5785
5786                   /* It is the end of normal or biased ops.
5787                      last_rhs1_stmt_index used to record the last stmt index
5788                      for normal ops.  last_rhs2_stmt_index used to record the
5789                      last stmt index for biased ops.  */
5790                   if (width_count == BIASED_END_STMT)
5791                     {
5792                       gcc_assert (has_biased);
5793                       if (ops_biased.length ())
5794                         last_rhs1_stmt_index = i;
5795                       else
5796                         last_rhs2_stmt_index = i;
5797                       width_count--;
5798                     }
5799                 }
5800             }
5801           else
5802             {
5803               /* Attach the rest ops to the parallel dependency chain.  */
5804               oe = ops1->pop ();
5805               op1 = oe->op;
5806               stmt1 = oe->stmt_to_insert;
5807               if (stmt1)
5808                 insert_stmt_before_use (stmts[i], stmt1);
5809               stmt1 = NULL;
5810
5811               /* For only one biased ops.  */
5812               if (width_count == SPECIAL_BIASED_END_STMT)
5813                 {
5814                   /* We keep original statement only for the last one.  All
5815                      others are recreated.  */
5816                   gcc_assert (has_biased);
5817                   gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5818                                           (stmts[last_rhs1_stmt_index]));
5819                   gimple_assign_set_rhs2 (stmts[i], op1);
5820                   update_stmt (stmts[i]);
5821                 }
5822               else
5823                 {
5824                   stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5825                                                 gimple_assign_lhs
5826                                                 (stmts[i-width_active]),
5827                                                 op1,
5828                                                 opcode);
5829                   gimple_set_visited (stmts[i], true);
5830                 }
5831             }
5832         }
5833
5834       if (dump_file && (dump_flags & TDF_DETAILS))
5835         {
5836           fprintf (dump_file, " into ");
5837           print_gimple_stmt (dump_file, stmts[i], 0);
5838         }
5839     }
5840
5841   remove_visited_stmt_chain (last_rhs1);
5842 }
5843
5844 /* Transform STMT, which is really (A +B) + (C + D) into the left
5845    linear form, ((A+B)+C)+D.
5846    Recurse on D if necessary.  */
5847
5848 static void
5849 linearize_expr (gimple *stmt)
5850 {
5851   gimple_stmt_iterator gsi;
5852   gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5853   gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5854   gimple *oldbinrhs = binrhs;
5855   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5856   gimple *newbinrhs = NULL;
5857   class loop *loop = loop_containing_stmt (stmt);
5858   tree lhs = gimple_assign_lhs (stmt);
5859
5860   gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5861               && is_reassociable_op (binrhs, rhscode, loop));
5862
5863   gsi = gsi_for_stmt (stmt);
5864
5865   gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5866   binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5867                                 gimple_assign_rhs_code (binrhs),
5868                                 gimple_assign_lhs (binlhs),
5869                                 gimple_assign_rhs2 (binrhs));
5870   gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5871   gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5872   gimple_set_uid (binrhs, gimple_uid (stmt));
5873
5874   if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5875     newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5876
5877   if (dump_file && (dump_flags & TDF_DETAILS))
5878     {
5879       fprintf (dump_file, "Linearized: ");
5880       print_gimple_stmt (dump_file, stmt, 0);
5881     }
5882
5883   reassociate_stats.linearized++;
5884   update_stmt (stmt);
5885
5886   gsi = gsi_for_stmt (oldbinrhs);
5887   reassoc_remove_stmt (&gsi);
5888   release_defs (oldbinrhs);
5889
5890   gimple_set_visited (stmt, true);
5891   gimple_set_visited (binlhs, true);
5892   gimple_set_visited (binrhs, true);
5893
5894   /* Tail recurse on the new rhs if it still needs reassociation.  */
5895   if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5896     /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5897            want to change the algorithm while converting to tuples.  */
5898     linearize_expr (stmt);
5899 }
5900
5901 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5902    it.  Otherwise, return NULL.  */
5903
5904 static gimple *
5905 get_single_immediate_use (tree lhs)
5906 {
5907   use_operand_p immuse;
5908   gimple *immusestmt;
5909
5910   if (TREE_CODE (lhs) == SSA_NAME
5911       && single_imm_use (lhs, &immuse, &immusestmt)
5912       && is_gimple_assign (immusestmt))
5913     return immusestmt;
5914
5915   return NULL;
5916 }
5917
5918 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5919    representing the negated value.  Insertions of any necessary
5920    instructions go before GSI.
5921    This function is recursive in that, if you hand it "a_5" as the
5922    value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5923    transform b_3 + b_4 into a_5 = -b_3 + -b_4.  */
5924
5925 static tree
5926 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5927 {
5928   gimple *negatedefstmt = NULL;
5929   tree resultofnegate;
5930   gimple_stmt_iterator gsi;
5931   unsigned int uid;
5932
5933   /* If we are trying to negate a name, defined by an add, negate the
5934      add operands instead.  */
5935   if (TREE_CODE (tonegate) == SSA_NAME)
5936     negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5937   if (TREE_CODE (tonegate) == SSA_NAME
5938       && is_gimple_assign (negatedefstmt)
5939       && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5940       && has_single_use (gimple_assign_lhs (negatedefstmt))
5941       && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5942     {
5943       tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5944       tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5945       tree lhs = gimple_assign_lhs (negatedefstmt);
5946       gimple *g;
5947
5948       gsi = gsi_for_stmt (negatedefstmt);
5949       rhs1 = negate_value (rhs1, &gsi);
5950
5951       gsi = gsi_for_stmt (negatedefstmt);
5952       rhs2 = negate_value (rhs2, &gsi);
5953
5954       gsi = gsi_for_stmt (negatedefstmt);
5955       lhs = make_ssa_name (TREE_TYPE (lhs));
5956       gimple_set_visited (negatedefstmt, true);
5957       g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5958       gimple_set_uid (g, gimple_uid (negatedefstmt));
5959       gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5960       return lhs;
5961     }
5962
5963   tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5964   resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5965                                              NULL_TREE, true, GSI_SAME_STMT);
5966   gsi = *gsip;
5967   uid = gimple_uid (gsi_stmt (gsi));
5968   for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5969     {
5970       gimple *stmt = gsi_stmt (gsi);
5971       if (gimple_uid (stmt) != 0)
5972         break;
5973       gimple_set_uid (stmt, uid);
5974     }
5975   return resultofnegate;
5976 }
5977
5978 /* Return true if we should break up the subtract in STMT into an add
5979    with negate.  This is true when we the subtract operands are really
5980    adds, or the subtract itself is used in an add expression.  In
5981    either case, breaking up the subtract into an add with negate
5982    exposes the adds to reassociation.  */
5983
5984 static bool
5985 should_break_up_subtract (gimple *stmt)
5986 {
5987   tree lhs = gimple_assign_lhs (stmt);
5988   tree binlhs = gimple_assign_rhs1 (stmt);
5989   tree binrhs = gimple_assign_rhs2 (stmt);
5990   gimple *immusestmt;
5991   class loop *loop = loop_containing_stmt (stmt);
5992
5993   if (TREE_CODE (binlhs) == SSA_NAME
5994       && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5995     return true;
5996
5997   if (TREE_CODE (binrhs) == SSA_NAME
5998       && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5999     return true;
6000
6001   if (TREE_CODE (lhs) == SSA_NAME
6002       && (immusestmt = get_single_immediate_use (lhs))
6003       && is_gimple_assign (immusestmt)
6004       && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
6005           || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
6006               && gimple_assign_rhs1 (immusestmt) == lhs)
6007           || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
6008     return true;
6009   return false;
6010 }
6011
6012 /* Transform STMT from A - B into A + -B.  */
6013
6014 static void
6015 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
6016 {
6017   tree rhs1 = gimple_assign_rhs1 (stmt);
6018   tree rhs2 = gimple_assign_rhs2 (stmt);
6019
6020   if (dump_file && (dump_flags & TDF_DETAILS))
6021     {
6022       fprintf (dump_file, "Breaking up subtract ");
6023       print_gimple_stmt (dump_file, stmt, 0);
6024     }
6025
6026   rhs2 = negate_value (rhs2, gsip);
6027   gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
6028   update_stmt (stmt);
6029 }
6030
6031 /* Determine whether STMT is a builtin call that raises an SSA name
6032    to an integer power and has only one use.  If so, and this is early
6033    reassociation and unsafe math optimizations are permitted, place
6034    the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6035    If any of these conditions does not hold, return FALSE.  */
6036
6037 static bool
6038 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
6039 {
6040   tree arg1;
6041   REAL_VALUE_TYPE c, cint;
6042
6043   switch (gimple_call_combined_fn (stmt))
6044     {
6045     CASE_CFN_POW:
6046       if (flag_errno_math)
6047         return false;
6048
6049       *base = gimple_call_arg (stmt, 0);
6050       arg1 = gimple_call_arg (stmt, 1);
6051
6052       if (TREE_CODE (arg1) != REAL_CST)
6053         return false;
6054
6055       c = TREE_REAL_CST (arg1);
6056
6057       if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
6058         return false;
6059
6060       *exponent = real_to_integer (&c);
6061       real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
6062       if (!real_identical (&c, &cint))
6063         return false;
6064
6065       break;
6066
6067     CASE_CFN_POWI:
6068       *base = gimple_call_arg (stmt, 0);
6069       arg1 = gimple_call_arg (stmt, 1);
6070
6071       if (!tree_fits_shwi_p (arg1))
6072         return false;
6073
6074       *exponent = tree_to_shwi (arg1);
6075       break;
6076
6077     default:
6078       return false;
6079     }
6080
6081   /* Expanding negative exponents is generally unproductive, so we don't
6082      complicate matters with those.  Exponents of zero and one should
6083      have been handled by expression folding.  */
6084   if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6085     return false;
6086
6087   return true;
6088 }
6089
6090 /* Try to derive and add operand entry for OP to *OPS.  Return false if
6091    unsuccessful.  */
6092
6093 static bool
6094 try_special_add_to_ops (vec<operand_entry *> *ops,
6095                         enum tree_code code,
6096                         tree op, gimple* def_stmt)
6097 {
6098   tree base = NULL_TREE;
6099   HOST_WIDE_INT exponent = 0;
6100
6101   if (TREE_CODE (op) != SSA_NAME
6102       || ! has_single_use (op))
6103     return false;
6104
6105   if (code == MULT_EXPR
6106       && reassoc_insert_powi_p
6107       && flag_unsafe_math_optimizations
6108       && is_gimple_call (def_stmt)
6109       && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6110     {
6111       add_repeat_to_ops_vec (ops, base, exponent);
6112       gimple_set_visited (def_stmt, true);
6113       return true;
6114     }
6115   else if (code == MULT_EXPR
6116            && is_gimple_assign (def_stmt)
6117            && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6118            && !HONOR_SNANS (TREE_TYPE (op))
6119            && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6120                || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6121            && (!FLOAT_TYPE_P (TREE_TYPE (op))
6122                || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6123     {
6124       tree rhs1 = gimple_assign_rhs1 (def_stmt);
6125       tree cst = build_minus_one_cst (TREE_TYPE (op));
6126       add_to_ops_vec (ops, rhs1);
6127       add_to_ops_vec (ops, cst);
6128       gimple_set_visited (def_stmt, true);
6129       return true;
6130     }
6131
6132   return false;
6133 }
6134
6135 /* Recursively linearize a binary expression that is the RHS of STMT.
6136    Place the operands of the expression tree in the vector named OPS.  */
6137
6138 static void
6139 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6140                      bool is_associative, bool set_visited)
6141 {
6142   tree binlhs = gimple_assign_rhs1 (stmt);
6143   tree binrhs = gimple_assign_rhs2 (stmt);
6144   gimple *binlhsdef = NULL, *binrhsdef = NULL;
6145   bool binlhsisreassoc = false;
6146   bool binrhsisreassoc = false;
6147   enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6148   class loop *loop = loop_containing_stmt (stmt);
6149
6150   if (set_visited)
6151     gimple_set_visited (stmt, true);
6152
6153   if (TREE_CODE (binlhs) == SSA_NAME)
6154     {
6155       binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6156       binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6157                          && !stmt_could_throw_p (cfun, binlhsdef));
6158     }
6159
6160   if (TREE_CODE (binrhs) == SSA_NAME)
6161     {
6162       binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6163       binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6164                          && !stmt_could_throw_p (cfun, binrhsdef));
6165     }
6166
6167   /* If the LHS is not reassociable, but the RHS is, we need to swap
6168      them.  If neither is reassociable, there is nothing we can do, so
6169      just put them in the ops vector.  If the LHS is reassociable,
6170      linearize it.  If both are reassociable, then linearize the RHS
6171      and the LHS.  */
6172
6173   if (!binlhsisreassoc)
6174     {
6175       /* If this is not a associative operation like division, give up.  */
6176       if (!is_associative)
6177         {
6178           add_to_ops_vec (ops, binrhs);
6179           return;
6180         }
6181
6182       if (!binrhsisreassoc)
6183         {
6184           bool swap = false;
6185           if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6186             /* If we add ops for the rhs we expect to be able to recurse
6187                to it via the lhs during expression rewrite so swap
6188                operands.  */
6189             swap = true;
6190           else
6191             add_to_ops_vec (ops, binrhs);
6192
6193           if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6194             add_to_ops_vec (ops, binlhs);
6195
6196           if (!swap)
6197             return;
6198         }
6199
6200       if (dump_file && (dump_flags & TDF_DETAILS))
6201         {
6202           fprintf (dump_file, "swapping operands of ");
6203           print_gimple_stmt (dump_file, stmt, 0);
6204         }
6205
6206       swap_ssa_operands (stmt,
6207                          gimple_assign_rhs1_ptr (stmt),
6208                          gimple_assign_rhs2_ptr (stmt));
6209       update_stmt (stmt);
6210
6211       if (dump_file && (dump_flags & TDF_DETAILS))
6212         {
6213           fprintf (dump_file, " is now ");
6214           print_gimple_stmt (dump_file, stmt, 0);
6215         }
6216       if (!binrhsisreassoc)
6217         return;
6218
6219       /* We want to make it so the lhs is always the reassociative op,
6220          so swap.  */
6221       std::swap (binlhs, binrhs);
6222     }
6223   else if (binrhsisreassoc)
6224     {
6225       linearize_expr (stmt);
6226       binlhs = gimple_assign_rhs1 (stmt);
6227       binrhs = gimple_assign_rhs2 (stmt);
6228     }
6229
6230   gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6231               || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6232                                       rhscode, loop));
6233   linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6234                        is_associative, set_visited);
6235
6236   if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6237     add_to_ops_vec (ops, binrhs);
6238 }
6239
6240 /* Repropagate the negates back into subtracts, since no other pass
6241    currently does it.  */
6242
6243 static void
6244 repropagate_negates (void)
6245 {
6246   unsigned int i = 0;
6247   tree negate;
6248
6249   FOR_EACH_VEC_ELT (plus_negates, i, negate)
6250     {
6251       gimple *user = get_single_immediate_use (negate);
6252       if (!user || !is_gimple_assign (user))
6253         continue;
6254
6255       tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6256       if (TREE_CODE (negateop) == SSA_NAME
6257           && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6258         continue;
6259
6260       /* The negate operand can be either operand of a PLUS_EXPR
6261          (it can be the LHS if the RHS is a constant for example).
6262
6263          Force the negate operand to the RHS of the PLUS_EXPR, then
6264          transform the PLUS_EXPR into a MINUS_EXPR.  */
6265       if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6266         {
6267           /* If the negated operand appears on the LHS of the
6268              PLUS_EXPR, exchange the operands of the PLUS_EXPR
6269              to force the negated operand to the RHS of the PLUS_EXPR.  */
6270           if (gimple_assign_rhs1 (user) == negate)
6271             {
6272               swap_ssa_operands (user,
6273                                  gimple_assign_rhs1_ptr (user),
6274                                  gimple_assign_rhs2_ptr (user));
6275             }
6276
6277           /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6278              the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR.  */
6279           if (gimple_assign_rhs2 (user) == negate)
6280             {
6281               tree rhs1 = gimple_assign_rhs1 (user);
6282               gimple_stmt_iterator gsi = gsi_for_stmt (user);
6283               gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6284                                               negateop);
6285               update_stmt (user);
6286             }
6287         }
6288       else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6289         {
6290           if (gimple_assign_rhs1 (user) == negate)
6291             {
6292               /* We have
6293                    x = -negateop
6294                    y = x - b
6295                  which we transform into
6296                    x = negateop + b
6297                    y = -x .
6298                  This pushes down the negate which we possibly can merge
6299                  into some other operation, hence insert it into the
6300                  plus_negates vector.  */
6301               gimple *feed = SSA_NAME_DEF_STMT (negate);
6302               tree b = gimple_assign_rhs2 (user);
6303               gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6304               gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6305               tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6306               gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6307               gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6308               gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6309               user = gsi_stmt (gsi2);
6310               update_stmt (user);
6311               reassoc_remove_stmt (&gsi);
6312               release_defs (feed);
6313               plus_negates.safe_push (gimple_assign_lhs (user));
6314             }
6315           else
6316             {
6317               /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6318                  getting rid of one operation.  */
6319               tree rhs1 = gimple_assign_rhs1 (user);
6320               gimple_stmt_iterator gsi = gsi_for_stmt (user);
6321               gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6322               update_stmt (gsi_stmt (gsi));
6323             }
6324         }
6325     }
6326 }
6327
6328 /* Break up subtract operations in block BB.
6329
6330    We do this top down because we don't know whether the subtract is
6331    part of a possible chain of reassociation except at the top.
6332
6333    IE given
6334    d = f + g
6335    c = a + e
6336    b = c - d
6337    q = b - r
6338    k = t - q
6339
6340    we want to break up k = t - q, but we won't until we've transformed q
6341    = b - r, which won't be broken up until we transform b = c - d.
6342
6343    En passant, clear the GIMPLE visited flag on every statement
6344    and set UIDs within each basic block.  */
6345
6346 static void
6347 break_up_subtract_bb (basic_block bb)
6348 {
6349   gimple_stmt_iterator gsi;
6350   unsigned int uid = 1;
6351
6352   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6353     {
6354       gimple *stmt = gsi_stmt (gsi);
6355       gimple_set_visited (stmt, false);
6356       gimple_set_uid (stmt, uid++);
6357
6358       if (!is_gimple_assign (stmt)
6359           || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6360           || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6361         continue;
6362
6363       /* Look for simple gimple subtract operations.  */
6364       if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6365         {
6366           if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6367               || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6368             continue;
6369
6370           /* Check for a subtract used only in an addition.  If this
6371              is the case, transform it into add of a negate for better
6372              reassociation.  IE transform C = A-B into C = A + -B if C
6373              is only used in an addition.  */
6374           if (should_break_up_subtract (stmt))
6375             break_up_subtract (stmt, &gsi);
6376         }
6377       else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6378                && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6379         plus_negates.safe_push (gimple_assign_lhs (stmt));
6380     }
6381 }
6382
6383 /* Used for repeated factor analysis.  */
6384 struct repeat_factor
6385 {
6386   /* An SSA name that occurs in a multiply chain.  */
6387   tree factor;
6388
6389   /* Cached rank of the factor.  */
6390   unsigned rank;
6391
6392   /* Number of occurrences of the factor in the chain.  */
6393   HOST_WIDE_INT count;
6394
6395   /* An SSA name representing the product of this factor and
6396      all factors appearing later in the repeated factor vector.  */
6397   tree repr;
6398 };
6399
6400
6401 static vec<repeat_factor> repeat_factor_vec;
6402
6403 /* Used for sorting the repeat factor vector.  Sort primarily by
6404    ascending occurrence count, secondarily by descending rank.  */
6405
6406 static int
6407 compare_repeat_factors (const void *x1, const void *x2)
6408 {
6409   const repeat_factor *rf1 = (const repeat_factor *) x1;
6410   const repeat_factor *rf2 = (const repeat_factor *) x2;
6411
6412   if (rf1->count < rf2->count)
6413     return -1;
6414   else if (rf1->count > rf2->count)
6415     return 1;
6416
6417   if (rf1->rank < rf2->rank)
6418     return 1;
6419   else if (rf1->rank > rf2->rank)
6420     return -1;
6421
6422   return 0;
6423 }
6424
6425 /* Look for repeated operands in OPS in the multiply tree rooted at
6426    STMT.  Replace them with an optimal sequence of multiplies and powi
6427    builtin calls, and remove the used operands from OPS.  Return an
6428    SSA name representing the value of the replacement sequence.  */
6429
6430 static tree
6431 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6432 {
6433   unsigned i, j, vec_len;
6434   int ii;
6435   operand_entry *oe;
6436   repeat_factor *rf1, *rf2;
6437   repeat_factor rfnew;
6438   tree result = NULL_TREE;
6439   tree target_ssa, iter_result;
6440   tree type = TREE_TYPE (gimple_get_lhs (stmt));
6441   tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6442   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6443   gimple *mul_stmt, *pow_stmt;
6444
6445   /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6446      target, unless type is integral.  */
6447   if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6448     return NULL_TREE;
6449
6450   /* Allocate the repeated factor vector.  */
6451   repeat_factor_vec.create (10);
6452
6453   /* Scan the OPS vector for all SSA names in the product and build
6454      up a vector of occurrence counts for each factor.  */
6455   FOR_EACH_VEC_ELT (*ops, i, oe)
6456     {
6457       if (TREE_CODE (oe->op) == SSA_NAME)
6458         {
6459           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6460             {
6461               if (rf1->factor == oe->op)
6462                 {
6463                   rf1->count += oe->count;
6464                   break;
6465                 }
6466             }
6467
6468           if (j >= repeat_factor_vec.length ())
6469             {
6470               rfnew.factor = oe->op;
6471               rfnew.rank = oe->rank;
6472               rfnew.count = oe->count;
6473               rfnew.repr = NULL_TREE;
6474               repeat_factor_vec.safe_push (rfnew);
6475             }
6476         }
6477     }
6478
6479   /* Sort the repeated factor vector by (a) increasing occurrence count,
6480      and (b) decreasing rank.  */
6481   repeat_factor_vec.qsort (compare_repeat_factors);
6482
6483   /* It is generally best to combine as many base factors as possible
6484      into a product before applying __builtin_powi to the result.
6485      However, the sort order chosen for the repeated factor vector
6486      allows us to cache partial results for the product of the base
6487      factors for subsequent use.  When we already have a cached partial
6488      result from a previous iteration, it is best to make use of it
6489      before looking for another __builtin_pow opportunity.
6490
6491      As an example, consider x * x * y * y * y * z * z * z * z.
6492      We want to first compose the product x * y * z, raise it to the
6493      second power, then multiply this by y * z, and finally multiply
6494      by z.  This can be done in 5 multiplies provided we cache y * z
6495      for use in both expressions:
6496
6497         t1 = y * z
6498         t2 = t1 * x
6499         t3 = t2 * t2
6500         t4 = t1 * t3
6501         result = t4 * z
6502
6503      If we instead ignored the cached y * z and first multiplied by
6504      the __builtin_pow opportunity z * z, we would get the inferior:
6505
6506         t1 = y * z
6507         t2 = t1 * x
6508         t3 = t2 * t2
6509         t4 = z * z
6510         t5 = t3 * t4
6511         result = t5 * y  */
6512
6513   vec_len = repeat_factor_vec.length ();
6514
6515   /* Repeatedly look for opportunities to create a builtin_powi call.  */
6516   while (true)
6517     {
6518       HOST_WIDE_INT power;
6519
6520       /* First look for the largest cached product of factors from
6521          preceding iterations.  If found, create a builtin_powi for
6522          it if the minimum occurrence count for its factors is at
6523          least 2, or just use this cached product as our next
6524          multiplicand if the minimum occurrence count is 1.  */
6525       FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6526         {
6527           if (rf1->repr && rf1->count > 0)
6528             break;
6529         }
6530
6531       if (j < vec_len)
6532         {
6533           power = rf1->count;
6534
6535           if (power == 1)
6536             {
6537               iter_result = rf1->repr;
6538
6539               if (dump_file && (dump_flags & TDF_DETAILS))
6540                 {
6541                   unsigned elt;
6542                   repeat_factor *rf;
6543                   fputs ("Multiplying by cached product ", dump_file);
6544                   for (elt = j; elt < vec_len; elt++)
6545                     {
6546                       rf = &repeat_factor_vec[elt];
6547                       print_generic_expr (dump_file, rf->factor);
6548                       if (elt < vec_len - 1)
6549                         fputs (" * ", dump_file);
6550                     }
6551                   fputs ("\n", dump_file);
6552                 }
6553             }
6554           else
6555             {
6556               if (INTEGRAL_TYPE_P (type))
6557                 {
6558                   gcc_assert (power > 1);
6559                   gimple_stmt_iterator gsip = gsi;
6560                   gsi_prev (&gsip);
6561                   iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6562                                                rf1->repr, power);
6563                   gimple_stmt_iterator gsic = gsi;
6564                   while (gsi_stmt (gsic) != gsi_stmt (gsip))
6565                     {
6566                       gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6567                       gimple_set_visited (gsi_stmt (gsic), true);
6568                       gsi_prev (&gsic);
6569                     }
6570                 }
6571               else
6572                 {
6573                   iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6574                   pow_stmt
6575                     = gimple_build_call (powi_fndecl, 2, rf1->repr,
6576                                          build_int_cst (integer_type_node,
6577                                                         power));
6578                   gimple_call_set_lhs (pow_stmt, iter_result);
6579                   gimple_set_location (pow_stmt, gimple_location (stmt));
6580                   gimple_set_uid (pow_stmt, gimple_uid (stmt));
6581                   gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6582                 }
6583
6584               if (dump_file && (dump_flags & TDF_DETAILS))
6585                 {
6586                   unsigned elt;
6587                   repeat_factor *rf;
6588                   fputs ("Building __builtin_pow call for cached product (",
6589                          dump_file);
6590                   for (elt = j; elt < vec_len; elt++)
6591                     {
6592                       rf = &repeat_factor_vec[elt];
6593                       print_generic_expr (dump_file, rf->factor);
6594                       if (elt < vec_len - 1)
6595                         fputs (" * ", dump_file);
6596                     }
6597                   fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6598                            power);
6599                 }
6600             }
6601         }
6602       else
6603         {
6604           /* Otherwise, find the first factor in the repeated factor
6605              vector whose occurrence count is at least 2.  If no such
6606              factor exists, there are no builtin_powi opportunities
6607              remaining.  */
6608           FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6609             {
6610               if (rf1->count >= 2)
6611                 break;
6612             }
6613
6614           if (j >= vec_len)
6615             break;
6616
6617           power = rf1->count;
6618
6619           if (dump_file && (dump_flags & TDF_DETAILS))
6620             {
6621               unsigned elt;
6622               repeat_factor *rf;
6623               fputs ("Building __builtin_pow call for (", dump_file);
6624               for (elt = j; elt < vec_len; elt++)
6625                 {
6626                   rf = &repeat_factor_vec[elt];
6627                   print_generic_expr (dump_file, rf->factor);
6628                   if (elt < vec_len - 1)
6629                     fputs (" * ", dump_file);
6630                 }
6631               fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6632             }
6633
6634           reassociate_stats.pows_created++;
6635
6636           /* Visit each element of the vector in reverse order (so that
6637              high-occurrence elements are visited first, and within the
6638              same occurrence count, lower-ranked elements are visited
6639              first).  Form a linear product of all elements in this order
6640              whose occurrencce count is at least that of element J.
6641              Record the SSA name representing the product of each element
6642              with all subsequent elements in the vector.  */
6643           if (j == vec_len - 1)
6644             rf1->repr = rf1->factor;
6645           else
6646             {
6647               for (ii = vec_len - 2; ii >= (int)j; ii--)
6648                 {
6649                   tree op1, op2;
6650
6651                   rf1 = &repeat_factor_vec[ii];
6652                   rf2 = &repeat_factor_vec[ii + 1];
6653
6654                   /* Init the last factor's representative to be itself.  */
6655                   if (!rf2->repr)
6656                     rf2->repr = rf2->factor;
6657
6658                   op1 = rf1->factor;
6659                   op2 = rf2->repr;
6660
6661                   target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6662                   mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6663                                                   op1, op2);
6664                   gimple_set_location (mul_stmt, gimple_location (stmt));
6665                   gimple_set_uid (mul_stmt, gimple_uid (stmt));
6666                   gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6667                   rf1->repr = target_ssa;
6668
6669                   /* Don't reprocess the multiply we just introduced.  */
6670                   gimple_set_visited (mul_stmt, true);
6671                 }
6672             }
6673
6674           /* Form a call to __builtin_powi for the maximum product
6675              just formed, raised to the power obtained earlier.  */
6676           rf1 = &repeat_factor_vec[j];
6677           if (INTEGRAL_TYPE_P (type))
6678             {
6679               gcc_assert (power > 1);
6680               gimple_stmt_iterator gsip = gsi;
6681               gsi_prev (&gsip);
6682               iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6683                                            rf1->repr, power);
6684               gimple_stmt_iterator gsic = gsi;
6685               while (gsi_stmt (gsic) != gsi_stmt (gsip))
6686                 {
6687                   gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6688                   gimple_set_visited (gsi_stmt (gsic), true);
6689                   gsi_prev (&gsic);
6690                 }
6691             }
6692           else
6693             {
6694               iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6695               pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6696                                             build_int_cst (integer_type_node,
6697                                                            power));
6698               gimple_call_set_lhs (pow_stmt, iter_result);
6699               gimple_set_location (pow_stmt, gimple_location (stmt));
6700               gimple_set_uid (pow_stmt, gimple_uid (stmt));
6701               gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6702             }
6703         }
6704
6705       /* If we previously formed at least one other builtin_powi call,
6706          form the product of this one and those others.  */
6707       if (result)
6708         {
6709           tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6710           mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6711                                           result, iter_result);
6712           gimple_set_location (mul_stmt, gimple_location (stmt));
6713           gimple_set_uid (mul_stmt, gimple_uid (stmt));
6714           gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6715           gimple_set_visited (mul_stmt, true);
6716           result = new_result;
6717         }
6718       else
6719         result = iter_result;
6720
6721       /* Decrement the occurrence count of each element in the product
6722          by the count found above, and remove this many copies of each
6723          factor from OPS.  */
6724       for (i = j; i < vec_len; i++)
6725         {
6726           unsigned k = power;
6727           unsigned n;
6728
6729           rf1 = &repeat_factor_vec[i];
6730           rf1->count -= power;
6731
6732           FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6733             {
6734               if (oe->op == rf1->factor)
6735                 {
6736                   if (oe->count <= k)
6737                     {
6738                       ops->ordered_remove (n);
6739                       k -= oe->count;
6740
6741                       if (k == 0)
6742                         break;
6743                     }
6744                   else
6745                     {
6746                       oe->count -= k;
6747                       break;
6748                     }
6749                 }
6750             }
6751         }
6752     }
6753
6754   /* At this point all elements in the repeated factor vector have a
6755      remaining occurrence count of 0 or 1, and those with a count of 1
6756      don't have cached representatives.  Re-sort the ops vector and
6757      clean up.  */
6758   ops->qsort (sort_by_operand_rank);
6759   repeat_factor_vec.release ();
6760
6761   /* Return the final product computed herein.  Note that there may
6762      still be some elements with single occurrence count left in OPS;
6763      those will be handled by the normal reassociation logic.  */
6764   return result;
6765 }
6766
6767 /* Attempt to optimize
6768    CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6769    CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0.  */
6770
6771 static void
6772 attempt_builtin_copysign (vec<operand_entry *> *ops)
6773 {
6774   operand_entry *oe;
6775   unsigned int i;
6776   unsigned int length = ops->length ();
6777   tree cst = ops->last ()->op;
6778
6779   if (length == 1 || TREE_CODE (cst) != REAL_CST)
6780     return;
6781
6782   FOR_EACH_VEC_ELT (*ops, i, oe)
6783     {
6784       if (TREE_CODE (oe->op) == SSA_NAME
6785           && has_single_use (oe->op))
6786         {
6787           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6788           if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6789             {
6790               tree arg0, arg1;
6791               switch (gimple_call_combined_fn (old_call))
6792                 {
6793                 CASE_CFN_COPYSIGN:
6794                 CASE_CFN_COPYSIGN_FN:
6795                   arg0 = gimple_call_arg (old_call, 0);
6796                   arg1 = gimple_call_arg (old_call, 1);
6797                   /* The first argument of copysign must be a constant,
6798                      otherwise there's nothing to do.  */
6799                   if (TREE_CODE (arg0) == REAL_CST)
6800                     {
6801                       tree type = TREE_TYPE (arg0);
6802                       tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6803                       /* If we couldn't fold to a single constant, skip it.
6804                          That happens e.g. for inexact multiplication when
6805                          -frounding-math.  */
6806                       if (mul == NULL_TREE)
6807                         break;
6808                       /* Instead of adjusting OLD_CALL, let's build a new
6809                          call to not leak the LHS and prevent keeping bogus
6810                          debug statements.  DCE will clean up the old call.  */
6811                       gcall *new_call;
6812                       if (gimple_call_internal_p (old_call))
6813                         new_call = gimple_build_call_internal
6814                           (IFN_COPYSIGN, 2, mul, arg1);
6815                       else
6816                         new_call = gimple_build_call
6817                           (gimple_call_fndecl (old_call), 2, mul, arg1);
6818                       tree lhs = make_ssa_name (type);
6819                       gimple_call_set_lhs (new_call, lhs);
6820                       gimple_set_location (new_call,
6821                                            gimple_location (old_call));
6822                       insert_stmt_after (new_call, old_call);
6823                       /* We've used the constant, get rid of it.  */
6824                       ops->pop ();
6825                       bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6826                       /* Handle the CST1 < 0 case by negating the result.  */
6827                       if (cst1_neg)
6828                         {
6829                           tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6830                           gimple *negate_stmt
6831                             = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6832                           insert_stmt_after (negate_stmt, new_call);
6833                           oe->op = negrhs;
6834                         }
6835                       else
6836                         oe->op = lhs;
6837                       if (dump_file && (dump_flags & TDF_DETAILS))
6838                         {
6839                           fprintf (dump_file, "Optimizing copysign: ");
6840                           print_generic_expr (dump_file, cst);
6841                           fprintf (dump_file, " * COPYSIGN (");
6842                           print_generic_expr (dump_file, arg0);
6843                           fprintf (dump_file, ", ");
6844                           print_generic_expr (dump_file, arg1);
6845                           fprintf (dump_file, ") into %sCOPYSIGN (",
6846                                    cst1_neg ? "-" : "");
6847                           print_generic_expr (dump_file, mul);
6848                           fprintf (dump_file, ", ");
6849                           print_generic_expr (dump_file, arg1);
6850                           fprintf (dump_file, "\n");
6851                         }
6852                       return;
6853                     }
6854                   break;
6855                 default:
6856                   break;
6857                 }
6858             }
6859         }
6860     }
6861 }
6862
6863 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS.  */
6864
6865 static void
6866 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6867 {
6868   tree rhs1;
6869
6870   if (dump_file && (dump_flags & TDF_DETAILS))
6871     {
6872       fprintf (dump_file, "Transforming ");
6873       print_gimple_stmt (dump_file, stmt, 0);
6874     }
6875
6876   rhs1 = gimple_assign_rhs1 (stmt);
6877   gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6878   update_stmt (stmt);
6879   remove_visited_stmt_chain (rhs1);
6880
6881   if (dump_file && (dump_flags & TDF_DETAILS))
6882     {
6883       fprintf (dump_file, " into ");
6884       print_gimple_stmt (dump_file, stmt, 0);
6885     }
6886 }
6887
6888 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2.  */
6889
6890 static void
6891 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6892                             tree rhs1, tree rhs2)
6893 {
6894   if (dump_file && (dump_flags & TDF_DETAILS))
6895     {
6896       fprintf (dump_file, "Transforming ");
6897       print_gimple_stmt (dump_file, stmt, 0);
6898     }
6899
6900   gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6901   update_stmt (gsi_stmt (*gsi));
6902   remove_visited_stmt_chain (rhs1);
6903
6904   if (dump_file && (dump_flags & TDF_DETAILS))
6905     {
6906       fprintf (dump_file, " into ");
6907       print_gimple_stmt (dump_file, stmt, 0);
6908     }
6909 }
6910
6911 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6912    Put no-mult ops and mult ops alternately at the end of the queue, which is
6913    conducive to generating more FMA and reducing the loss of FMA when breaking
6914    the chain.
6915    E.g.
6916    a * b + c * d + e generates:
6917
6918    _4  = c_9(D) * d_10(D);
6919    _12 = .FMA (a_7(D), b_8(D), _4);
6920    _11 = e_6(D) + _12;
6921
6922    Rearrange ops to -> e + a * b + c * d generates:
6923
6924    _4  = .FMA (c_7(D), d_8(D), _3);
6925    _11 = .FMA (a_5(D), b_6(D), _4);
6926
6927    Return the number of MULT_EXPRs in the chain.  */
6928 static int
6929 rank_ops_for_fma (vec<operand_entry *> *ops)
6930 {
6931   operand_entry *oe;
6932   unsigned int i;
6933   unsigned int ops_length = ops->length ();
6934   auto_vec<operand_entry *> ops_mult;
6935   auto_vec<operand_entry *> ops_others;
6936
6937   FOR_EACH_VEC_ELT (*ops, i, oe)
6938     {
6939       if (TREE_CODE (oe->op) == SSA_NAME)
6940         {
6941           gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6942           if (is_gimple_assign (def_stmt))
6943             {
6944               if (gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6945                 ops_mult.safe_push (oe);
6946               /* A negate on the multiplication leads to FNMA.  */
6947               else if (gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6948                        && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
6949                 {
6950                   gimple *neg_def_stmt
6951                     = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
6952                   if (is_gimple_assign (neg_def_stmt)
6953                       && gimple_bb (neg_def_stmt) == gimple_bb (def_stmt)
6954                       && gimple_assign_rhs_code (neg_def_stmt) == MULT_EXPR)
6955                     ops_mult.safe_push (oe);
6956                   else
6957                     ops_others.safe_push (oe);
6958                 }
6959               else
6960                 ops_others.safe_push (oe);
6961             }
6962           else
6963             ops_others.safe_push (oe);
6964         }
6965       else
6966         ops_others.safe_push (oe);
6967     }
6968   /* 1. When ops_mult.length == 2, like the following case,
6969
6970      a * b + c * d + e.
6971
6972      we need to rearrange the ops.
6973
6974      Putting ops that not def from mult in front can generate more FMAs.
6975
6976      2. If all ops are defined with mult, we don't need to rearrange them.  */
6977   unsigned mult_num = ops_mult.length ();
6978   if (mult_num >= 2 && mult_num != ops_length)
6979     {
6980       /* Put no-mult ops and mult ops alternately at the end of the
6981          queue, which is conducive to generating more FMA and reducing the
6982          loss of FMA when breaking the chain.  */
6983       ops->truncate (0);
6984       ops->splice (ops_mult);
6985       int j, opindex = ops->length ();
6986       int others_length = ops_others.length ();
6987       for (j = 0; j < others_length; j++)
6988         {
6989           oe = ops_others.pop ();
6990           ops->quick_insert (opindex, oe);
6991           if (opindex > 0)
6992             opindex--;
6993         }
6994     }
6995   return mult_num;
6996 }
6997 /* Reassociate expressions in basic block BB and its post-dominator as
6998    children.
6999
7000    Bubble up return status from maybe_optimize_range_tests.  */
7001
7002 static bool
7003 reassociate_bb (basic_block bb)
7004 {
7005   gimple_stmt_iterator gsi;
7006   gimple *stmt = last_nondebug_stmt (bb);
7007   bool cfg_cleanup_needed = false;
7008
7009   if (stmt && !gimple_visited_p (stmt))
7010     cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
7011
7012   bool do_prev = false;
7013   for (gsi = gsi_last_bb (bb);
7014        !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
7015     {
7016       do_prev = true;
7017       stmt = gsi_stmt (gsi);
7018
7019       if (is_gimple_assign (stmt)
7020           && !stmt_could_throw_p (cfun, stmt))
7021         {
7022           tree lhs, rhs1, rhs2;
7023           enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
7024
7025           /* If this was part of an already processed statement,
7026              we don't need to touch it again. */
7027           if (gimple_visited_p (stmt))
7028             {
7029               /* This statement might have become dead because of previous
7030                  reassociations.  */
7031               if (has_zero_uses (gimple_get_lhs (stmt)))
7032                 {
7033                   reassoc_remove_stmt (&gsi);
7034                   release_defs (stmt);
7035                   /* We might end up removing the last stmt above which
7036                      places the iterator to the end of the sequence.
7037                      Reset it to the last stmt in this case and make sure
7038                      we don't do gsi_prev in that case.  */
7039                   if (gsi_end_p (gsi))
7040                     {
7041                       gsi = gsi_last_bb (bb);
7042                       do_prev = false;
7043                     }
7044                 }
7045               continue;
7046             }
7047
7048           /* If this is not a gimple binary expression, there is
7049              nothing for us to do with it.  */
7050           if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
7051             continue;
7052
7053           lhs = gimple_assign_lhs (stmt);
7054           rhs1 = gimple_assign_rhs1 (stmt);
7055           rhs2 = gimple_assign_rhs2 (stmt);
7056
7057           /* For non-bit or min/max operations we can't associate
7058              all types.  Verify that here.  */
7059           if ((rhs_code != BIT_IOR_EXPR
7060                && rhs_code != BIT_AND_EXPR
7061                && rhs_code != BIT_XOR_EXPR
7062                && rhs_code != MIN_EXPR
7063                && rhs_code != MAX_EXPR
7064                && !can_reassociate_type_p (TREE_TYPE (lhs)))
7065               || !can_reassociate_op_p (rhs1)
7066               || !can_reassociate_op_p (rhs2))
7067             continue;
7068
7069           if (associative_tree_code (rhs_code))
7070             {
7071               auto_vec<operand_entry *> ops;
7072               tree powi_result = NULL_TREE;
7073               bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
7074
7075               /* There may be no immediate uses left by the time we
7076                  get here because we may have eliminated them all.  */
7077               if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
7078                 continue;
7079
7080               gimple_set_visited (stmt, true);
7081               linearize_expr_tree (&ops, stmt, true, true);
7082               ops.qsort (sort_by_operand_rank);
7083               int orig_len = ops.length ();
7084               optimize_ops_list (rhs_code, &ops);
7085               if (undistribute_ops_list (rhs_code, &ops,
7086                                          loop_containing_stmt (stmt)))
7087                 {
7088                   ops.qsort (sort_by_operand_rank);
7089                   optimize_ops_list (rhs_code, &ops);
7090                 }
7091               if (undistribute_bitref_for_vector (rhs_code, &ops,
7092                                                   loop_containing_stmt (stmt)))
7093                 {
7094                   ops.qsort (sort_by_operand_rank);
7095                   optimize_ops_list (rhs_code, &ops);
7096                 }
7097               if (rhs_code == PLUS_EXPR
7098                   && transform_add_to_multiply (&ops))
7099                 ops.qsort (sort_by_operand_rank);
7100
7101               if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7102                 {
7103                   if (is_vector)
7104                     optimize_vec_cond_expr (rhs_code, &ops);
7105                   else
7106                     optimize_range_tests (rhs_code, &ops, NULL);
7107                 }
7108
7109               if (rhs_code == MULT_EXPR && !is_vector)
7110                 {
7111                   attempt_builtin_copysign (&ops);
7112
7113                   if (reassoc_insert_powi_p
7114                       && (flag_unsafe_math_optimizations
7115                           || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7116                     powi_result = attempt_builtin_powi (stmt, &ops);
7117                 }
7118
7119               operand_entry *last;
7120               bool negate_result = false;
7121               if (ops.length () > 1
7122                   && rhs_code == MULT_EXPR)
7123                 {
7124                   last = ops.last ();
7125                   if ((integer_minus_onep (last->op)
7126                        || real_minus_onep (last->op))
7127                       && !HONOR_SNANS (TREE_TYPE (lhs))
7128                       && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7129                           || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7130                     {
7131                       ops.pop ();
7132                       negate_result = true;
7133                     }
7134                 }
7135
7136               tree new_lhs = lhs;
7137               /* If the operand vector is now empty, all operands were
7138                  consumed by the __builtin_powi optimization.  */
7139               if (ops.length () == 0)
7140                 transform_stmt_to_copy (&gsi, stmt, powi_result);
7141               else if (ops.length () == 1)
7142                 {
7143                   tree last_op = ops.last ()->op;
7144
7145                   /* If the stmt that defines operand has to be inserted, insert it
7146                      before the use.  */
7147                   if (ops.last ()->stmt_to_insert)
7148                     insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7149                   if (powi_result)
7150                     transform_stmt_to_multiply (&gsi, stmt, last_op,
7151                                                 powi_result);
7152                   else
7153                     transform_stmt_to_copy (&gsi, stmt, last_op);
7154                 }
7155               else
7156                 {
7157                   machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7158                   int ops_num = ops.length ();
7159                   int width = 0;
7160                   int mult_num = 0;
7161
7162                   /* For binary bit operations, if there are at least 3
7163                      operands and the last operand in OPS is a constant,
7164                      move it to the front.  This helps ensure that we generate
7165                      (X & Y) & C rather than (X & C) & Y.  The former will
7166                      often match a canonical bit test when we get to RTL.  */
7167                   if (ops.length () > 2
7168                       && (rhs_code == BIT_AND_EXPR
7169                           || rhs_code == BIT_IOR_EXPR
7170                           || rhs_code == BIT_XOR_EXPR)
7171                       && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7172                     std::swap (*ops[0], *ops[ops_num - 1]);
7173
7174                   optimization_type opt_type = bb_optimization_type (bb);
7175
7176                   /* If the target support FMA, rank_ops_for_fma will detect if
7177                      the chain has fmas and rearrange the ops if so.  */
7178                   if (direct_internal_fn_supported_p (IFN_FMA,
7179                                                       TREE_TYPE (lhs),
7180                                                       opt_type)
7181                       && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7182                     {
7183                       mult_num = rank_ops_for_fma (&ops);
7184                     }
7185
7186                   /* Only rewrite the expression tree to parallel in the
7187                      last reassoc pass to avoid useless work back-and-forth
7188                      with initial linearization.  */
7189                   bool has_fma = mult_num >= 2 && mult_num != ops_num;
7190                   if (!reassoc_insert_powi_p
7191                       && ops.length () > 3
7192                       && (width = get_reassociation_width (&ops, mult_num, lhs,
7193                                                            rhs_code, mode))
7194                            > 1)
7195                     {
7196                       if (dump_file && (dump_flags & TDF_DETAILS))
7197                         fprintf (dump_file,
7198                                  "Width = %d was chosen for reassociation\n",
7199                                  width);
7200                       rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7201                                                   width,
7202                                                   has_fma,
7203                                                   ops);
7204                     }
7205                   else
7206                     {
7207                       /* When there are three operands left, we want
7208                          to make sure the ones that get the double
7209                          binary op are chosen wisely.  */
7210                       int len = ops.length ();
7211                       if (len >= 3
7212                           && (!has_fma
7213                               /* width > 1 means ranking ops results in better
7214                                  parallelism.  Check current value to avoid
7215                                  calling get_reassociation_width again.  */
7216                               || (width != 1
7217                                   && get_reassociation_width (
7218                                        &ops, mult_num, lhs, rhs_code, mode)
7219                                        > 1)))
7220                         swap_ops_for_binary_stmt (ops, len - 3);
7221
7222                       new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7223                                                    powi_result != NULL
7224                                                    || negate_result,
7225                                                    len != orig_len);
7226                     }
7227
7228                   /* If we combined some repeated factors into a
7229                      __builtin_powi call, multiply that result by the
7230                      reassociated operands.  */
7231                   if (powi_result)
7232                     {
7233                       gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7234                       tree type = TREE_TYPE (lhs);
7235                       tree target_ssa = make_temp_ssa_name (type, NULL,
7236                                                             "reassocpow");
7237                       gimple_set_lhs (lhs_stmt, target_ssa);
7238                       update_stmt (lhs_stmt);
7239                       if (lhs != new_lhs)
7240                         {
7241                           target_ssa = new_lhs;
7242                           new_lhs = lhs;
7243                         }
7244                       mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7245                                                       powi_result, target_ssa);
7246                       gimple_set_location (mul_stmt, gimple_location (stmt));
7247                       gimple_set_uid (mul_stmt, gimple_uid (stmt));
7248                       gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7249                     }
7250                 }
7251
7252               if (negate_result)
7253                 {
7254                   stmt = SSA_NAME_DEF_STMT (lhs);
7255                   tree tmp = make_ssa_name (TREE_TYPE (lhs));
7256                   gimple_set_lhs (stmt, tmp);
7257                   if (lhs != new_lhs)
7258                     tmp = new_lhs;
7259                   gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7260                                                            tmp);
7261                   gimple_set_uid (neg_stmt, gimple_uid (stmt));
7262                   gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7263                   update_stmt (stmt);
7264                 }
7265             }
7266         }
7267     }
7268
7269   return cfg_cleanup_needed;
7270 }
7271
7272 /* Add jumps around shifts for range tests turned into bit tests.
7273    For each SSA_NAME VAR we have code like:
7274    VAR = ...; // final stmt of range comparison
7275    // bit test here...;
7276    OTHERVAR = ...; // final stmt of the bit test sequence
7277    RES = VAR | OTHERVAR;
7278    Turn the above into:
7279    VAR = ...;
7280    if (VAR != 0)
7281      goto <l3>;
7282    else
7283      goto <l2>;
7284    <l2>:
7285    // bit test here...;
7286    OTHERVAR = ...;
7287    <l3>:
7288    # RES = PHI<1(l1), OTHERVAR(l2)>;  */
7289
7290 static void
7291 branch_fixup (void)
7292 {
7293   tree var;
7294   unsigned int i;
7295
7296   FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7297     {
7298       gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7299       gimple *use_stmt;
7300       use_operand_p use;
7301       bool ok = single_imm_use (var, &use, &use_stmt);
7302       gcc_assert (ok
7303                   && is_gimple_assign (use_stmt)
7304                   && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7305                   && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7306
7307       basic_block cond_bb = gimple_bb (def_stmt);
7308       basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7309       basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7310
7311       gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7312       gimple *g = gimple_build_cond (NE_EXPR, var,
7313                                      build_zero_cst (TREE_TYPE (var)),
7314                                      NULL_TREE, NULL_TREE);
7315       location_t loc = gimple_location (use_stmt);
7316       gimple_set_location (g, loc);
7317       gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7318
7319       edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7320       etrue->probability = profile_probability::even ();
7321       edge efalse = find_edge (cond_bb, then_bb);
7322       efalse->flags = EDGE_FALSE_VALUE;
7323       efalse->probability -= etrue->probability;
7324       then_bb->count -= etrue->count ();
7325
7326       tree othervar = NULL_TREE;
7327       if (gimple_assign_rhs1 (use_stmt) == var)
7328         othervar = gimple_assign_rhs2 (use_stmt);
7329       else if (gimple_assign_rhs2 (use_stmt) == var)
7330         othervar = gimple_assign_rhs1 (use_stmt);
7331       else
7332         gcc_unreachable ();
7333       tree lhs = gimple_assign_lhs (use_stmt);
7334       gphi *phi = create_phi_node (lhs, merge_bb);
7335       add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7336       add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7337       gsi = gsi_for_stmt (use_stmt);
7338       gsi_remove (&gsi, true);
7339
7340       set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7341       set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7342     }
7343   reassoc_branch_fixups.release ();
7344 }
7345
7346 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7347 void debug_ops_vector (vec<operand_entry *> ops);
7348
7349 /* Dump the operand entry vector OPS to FILE.  */
7350
7351 void
7352 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7353 {
7354   operand_entry *oe;
7355   unsigned int i;
7356
7357   FOR_EACH_VEC_ELT (ops, i, oe)
7358     {
7359       fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7360       print_generic_expr (file, oe->op);
7361       fprintf (file, "\n");
7362     }
7363 }
7364
7365 /* Dump the operand entry vector OPS to STDERR.  */
7366
7367 DEBUG_FUNCTION void
7368 debug_ops_vector (vec<operand_entry *> ops)
7369 {
7370   dump_ops_vector (stderr, ops);
7371 }
7372
7373 /* Bubble up return status from reassociate_bb.  */
7374
7375 static bool
7376 do_reassoc ()
7377 {
7378   bool cfg_cleanup_needed = false;
7379   basic_block *worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
7380
7381   unsigned sp = 0;
7382   for (auto son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
7383        son; son = next_dom_son (CDI_DOMINATORS, son))
7384     worklist[sp++] = son;
7385   while (sp)
7386     {
7387       basic_block bb = worklist[--sp];
7388       break_up_subtract_bb (bb);
7389       for (auto son = first_dom_son (CDI_DOMINATORS, bb);
7390            son; son = next_dom_son (CDI_DOMINATORS, son))
7391         worklist[sp++] = son;
7392     }
7393
7394   for (auto son = first_dom_son (CDI_POST_DOMINATORS,
7395                                  EXIT_BLOCK_PTR_FOR_FN (cfun));
7396        son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7397     worklist[sp++] = son;
7398   while (sp)
7399     {
7400       basic_block bb = worklist[--sp];
7401       cfg_cleanup_needed |= reassociate_bb (bb);
7402       for (auto son = first_dom_son (CDI_POST_DOMINATORS, bb);
7403            son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7404         worklist[sp++] = son;
7405     }
7406
7407   free (worklist);
7408   return cfg_cleanup_needed;
7409 }
7410
7411 /* Initialize the reassociation pass.  */
7412
7413 static void
7414 init_reassoc (void)
7415 {
7416   int i;
7417   int64_t rank = 2;
7418   int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7419
7420   /* Find the loops, so that we can prevent moving calculations in
7421      them.  */
7422   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7423
7424   memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7425
7426   next_operand_entry_id = 0;
7427
7428   /* Reverse RPO (Reverse Post Order) will give us something where
7429      deeper loops come later.  */
7430   pre_and_rev_post_order_compute (NULL, bbs, false);
7431   bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7432   operand_rank = new hash_map<tree, int64_t>;
7433
7434   /* Give each default definition a distinct rank.  This includes
7435      parameters and the static chain.  Walk backwards over all
7436      SSA names so that we get proper rank ordering according
7437      to tree_swap_operands_p.  */
7438   for (i = num_ssa_names - 1; i > 0; --i)
7439     {
7440       tree name = ssa_name (i);
7441       if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7442         insert_operand_rank (name, ++rank);
7443     }
7444
7445   /* Set up rank for each BB  */
7446   for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7447     bb_rank[bbs[i]] = ++rank << 16;
7448
7449   free (bbs);
7450   calculate_dominance_info (CDI_POST_DOMINATORS);
7451   plus_negates = vNULL;
7452   mark_ssa_maybe_undefs ();
7453 }
7454
7455 /* Cleanup after the reassociation pass, and print stats if
7456    requested.  */
7457
7458 static void
7459 fini_reassoc (void)
7460 {
7461   statistics_counter_event (cfun, "Linearized",
7462                             reassociate_stats.linearized);
7463   statistics_counter_event (cfun, "Constants eliminated",
7464                             reassociate_stats.constants_eliminated);
7465   statistics_counter_event (cfun, "Ops eliminated",
7466                             reassociate_stats.ops_eliminated);
7467   statistics_counter_event (cfun, "Statements rewritten",
7468                             reassociate_stats.rewritten);
7469   statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7470                             reassociate_stats.pows_encountered);
7471   statistics_counter_event (cfun, "Built-in powi calls created",
7472                             reassociate_stats.pows_created);
7473
7474   delete operand_rank;
7475   bitmap_clear (biased_names);
7476   operand_entry_pool.release ();
7477   free (bb_rank);
7478   plus_negates.release ();
7479   free_dominance_info (CDI_POST_DOMINATORS);
7480   loop_optimizer_finalize ();
7481 }
7482
7483 /* Gate and execute functions for Reassociation.  If INSERT_POWI_P, enable
7484    insertion of __builtin_powi calls.
7485
7486    Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7487    optimization of a gimple conditional.  Otherwise returns zero.  */
7488
7489 static unsigned int
7490 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7491 {
7492   reassoc_insert_powi_p = insert_powi_p;
7493   reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7494
7495   init_reassoc ();
7496
7497   bool cfg_cleanup_needed;
7498   cfg_cleanup_needed = do_reassoc ();
7499   repropagate_negates ();
7500   branch_fixup ();
7501
7502   fini_reassoc ();
7503   return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7504 }
7505
7506 namespace {
7507
7508 const pass_data pass_data_reassoc =
7509 {
7510   GIMPLE_PASS, /* type */
7511   "reassoc", /* name */
7512   OPTGROUP_NONE, /* optinfo_flags */
7513   TV_TREE_REASSOC, /* tv_id */
7514   ( PROP_cfg | PROP_ssa ), /* properties_required */
7515   0, /* properties_provided */
7516   0, /* properties_destroyed */
7517   0, /* todo_flags_start */
7518   TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7519 };
7520
7521 class pass_reassoc : public gimple_opt_pass
7522 {
7523 public:
7524   pass_reassoc (gcc::context *ctxt)
7525     : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7526   {}
7527
7528   /* opt_pass methods: */
7529   opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7530   void set_pass_param (unsigned int n, bool param) final override
7531     {
7532       gcc_assert (n == 0);
7533       insert_powi_p = param;
7534       bias_loop_carried_phi_ranks_p = !param;
7535     }
7536   bool gate (function *) final override { return flag_tree_reassoc != 0; }
7537   unsigned int execute (function *) final override
7538   {
7539     return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7540   }
7541
7542  private:
7543   /* Enable insertion of __builtin_powi calls during execute_reassoc.  See
7544      point 3a in the pass header comment.  */
7545   bool insert_powi_p;
7546   bool bias_loop_carried_phi_ranks_p;
7547 }; // class pass_reassoc
7548
7549 } // anon namespace
7550
7551 gimple_opt_pass *
7552 make_pass_reassoc (gcc::context *ctxt)
7553 {
7554   return new pass_reassoc (ctxt);
7555 }