gcc/tree-ssa-math-opts.cc

   1 /* Global, SSA-based optimizations using mathematical identities.
   2    Copyright (C) 2005-2023 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
  21    operations.  These are common in sequences such as this one:
  22
  23         modulus = sqrt(x*x + y*y + z*z);
  24         x = x / modulus;
  25         y = y / modulus;
  26         z = z / modulus;
  27
  28    that can be optimized to
  29
  30         modulus = sqrt(x*x + y*y + z*z);
  31         rmodulus = 1.0 / modulus;
  32         x = x * rmodulus;
  33         y = y * rmodulus;
  34         z = z * rmodulus;
  35
  36    We do this for loop invariant divisors, and with this pass whenever
  37    we notice that a division has the same divisor multiple times.
  38
  39    Of course, like in PRE, we don't insert a division if a dominator
  40    already has one.  However, this cannot be done as an extension of
  41    PRE for several reasons.
  42
  43    First of all, with some experiments it was found out that the
  44    transformation is not always useful if there are only two divisions
  45    by the same divisor.  This is probably because modern processors
  46    can pipeline the divisions; on older, in-order processors it should
  47    still be effective to optimize two divisions by the same number.
  48    We make this a param, and it shall be called N in the remainder of
  49    this comment.
  50
  51    Second, if trapping math is active, we have less freedom on where
  52    to insert divisions: we can only do so in basic blocks that already
  53    contain one.  (If divisions don't trap, instead, we can insert
  54    divisions elsewhere, which will be in blocks that are common dominators
  55    of those that have the division).
  56
  57    We really don't want to compute the reciprocal unless a division will
  58    be found.  To do this, we won't insert the division in a basic block
  59    that has less than N divisions *post-dominating* it.
  60
  61    The algorithm constructs a subset of the dominator tree, holding the
  62    blocks containing the divisions and the common dominators to them,
  63    and walk it twice.  The first walk is in post-order, and it annotates
  64    each block with the number of divisions that post-dominate it: this
  65    gives information on where divisions can be inserted profitably.
  66    The second walk is in pre-order, and it inserts divisions as explained
  67    above, and replaces divisions by multiplications.
  68
  69    In the best case, the cost of the pass is O(n_statements).  In the
  70    worst-case, the cost is due to creating the dominator tree subset,
  71    with a cost of O(n_basic_blocks ^ 2); however this can only happen
  72    for n_statements / n_basic_blocks statements.  So, the amortized cost
  73    of creating the dominator tree subset is O(n_basic_blocks) and the
  74    worst-case cost of the pass is O(n_statements * n_basic_blocks).
  75
  76    More practically, the cost will be small because there are few
  77    divisions, and they tend to be in the same basic block, so insert_bb
  78    is called very few times.
  79
  80    If we did this using domwalk.cc, an efficient implementation would have
  81    to work on all the variables in a single pass, because we could not
  82    work on just a subset of the dominator tree, as we do now, and the
  83    cost would also be something like O(n_statements * n_basic_blocks).
  84    The data structures would be more complex in order to work on all the
  85    variables in a single pass.  */
  86
  87 #include "config.h"
  88 #include "system.h"
  89 #include "coretypes.h"
  90 #include "backend.h"
  91 #include "target.h"
  92 #include "rtl.h"
  93 #include "tree.h"
  94 #include "gimple.h"
  95 #include "predict.h"
  96 #include "alloc-pool.h"
  97 #include "tree-pass.h"
  98 #include "ssa.h"
  99 #include "optabs-tree.h"
 100 #include "gimple-pretty-print.h"
 101 #include "alias.h"
 102 #include "fold-const.h"
 103 #include "gimple-iterator.h"
 104 #include "gimple-fold.h"
 105 #include "gimplify.h"
 106 #include "gimplify-me.h"
 107 #include "stor-layout.h"
 108 #include "tree-cfg.h"
 109 #include "tree-dfa.h"
 110 #include "tree-ssa.h"
 111 #include "builtins.h"
 112 #include "internal-fn.h"
 113 #include "case-cfn-macros.h"
 114 #include "optabs-libfuncs.h"
 115 #include "tree-eh.h"
 116 #include "targhooks.h"
 117 #include "domwalk.h"
 118 #include "tree-ssa-math-opts.h"
 119
 120 /* This structure represents one basic block that either computes a
 121    division, or is a common dominator for basic block that compute a
 122    division.  */
 123 struct occurrence {
 124   /* The basic block represented by this structure.  */
 125   basic_block bb = basic_block();
 126
 127   /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
 128      inserted in BB.  */
 129   tree recip_def = tree();
 130
 131   /* If non-NULL, the SSA_NAME holding the definition for a squared
 132      reciprocal inserted in BB.  */
 133   tree square_recip_def = tree();
 134
 135   /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
 136      was inserted in BB.  */
 137   gimple *recip_def_stmt = nullptr;
 138
 139   /* Pointer to a list of "struct occurrence"s for blocks dominated
 140      by BB.  */
 141   struct occurrence *children = nullptr;
 142
 143   /* Pointer to the next "struct occurrence"s in the list of blocks
 144      sharing a common dominator.  */
 145   struct occurrence *next = nullptr;
 146
 147   /* The number of divisions that are in BB before compute_merit.  The
 148      number of divisions that are in BB or post-dominate it after
 149      compute_merit.  */
 150   int num_divisions = 0;
 151
 152   /* True if the basic block has a division, false if it is a common
 153      dominator for basic blocks that do.  If it is false and trapping
 154      math is active, BB is not a candidate for inserting a reciprocal.  */
 155   bool bb_has_division = false;
 156
 157   /* Construct a struct occurrence for basic block BB, and whose
 158      children list is headed by CHILDREN.  */
 159   occurrence (basic_block bb, struct occurrence *children)
 160   : bb (bb), children (children)
 161   {
 162     bb->aux = this;
 163   }
 164
 165   /* Destroy a struct occurrence and remove it from its basic block.  */
 166   ~occurrence ()
 167   {
 168     bb->aux = nullptr;
 169   }
 170
 171   /* Allocate memory for a struct occurrence from OCC_POOL.  */
 172   static void* operator new (size_t);
 173
 174   /* Return memory for a struct occurrence to OCC_POOL.  */
 175   static void operator delete (void*, size_t);
 176 };
 177
 178 static struct
 179 {
 180   /* Number of 1.0/X ops inserted.  */
 181   int rdivs_inserted;
 182
 183   /* Number of 1.0/FUNC ops inserted.  */
 184   int rfuncs_inserted;
 185 } reciprocal_stats;
 186
 187 static struct
 188 {
 189   /* Number of cexpi calls inserted.  */
 190   int inserted;
 191
 192   /* Number of conversions removed.  */
 193   int conv_removed;
 194
 195 } sincos_stats;
 196
 197 static struct
 198 {
 199   /* Number of widening multiplication ops inserted.  */
 200   int widen_mults_inserted;
 201
 202   /* Number of integer multiply-and-accumulate ops inserted.  */
 203   int maccs_inserted;
 204
 205   /* Number of fp fused multiply-add ops inserted.  */
 206   int fmas_inserted;
 207
 208   /* Number of divmod calls inserted.  */
 209   int divmod_calls_inserted;
 210
 211   /* Number of highpart multiplication ops inserted.  */
 212   int highpart_mults_inserted;
 213 } widen_mul_stats;
 214
 215 /* The instance of "struct occurrence" representing the highest
 216    interesting block in the dominator tree.  */
 217 static struct occurrence *occ_head;
 218
 219 /* Allocation pool for getting instances of "struct occurrence".  */
 220 static object_allocator<occurrence> *occ_pool;
 221
 222 void* occurrence::operator new (size_t n)
 223 {
 224   gcc_assert (n == sizeof(occurrence));
 225   return occ_pool->allocate_raw ();
 226 }
 227
 228 void occurrence::operator delete (void *occ, size_t n)
 229 {
 230   gcc_assert (n == sizeof(occurrence));
 231   occ_pool->remove_raw (occ);
 232 }
 233
 234 /* Insert NEW_OCC into our subset of the dominator tree.  P_HEAD points to a
 235    list of "struct occurrence"s, one per basic block, having IDOM as
 236    their common dominator.
 237
 238    We try to insert NEW_OCC as deep as possible in the tree, and we also
 239    insert any other block that is a common dominator for BB and one
 240    block already in the tree.  */
 241
 242 static void
 243 insert_bb (struct occurrence *new_occ, basic_block idom,
 244            struct occurrence **p_head)
 245 {
 246   struct occurrence *occ, **p_occ;
 247
 248   for (p_occ = p_head; (occ = *p_occ) != NULL; )
 249     {
 250       basic_block bb = new_occ->bb, occ_bb = occ->bb;
 251       basic_block dom = nearest_common_dominator (CDI_DOMINATORS, occ_bb, bb);
 252       if (dom == bb)
 253         {
 254           /* BB dominates OCC_BB.  OCC becomes NEW_OCC's child: remove OCC
 255              from its list.  */
 256           *p_occ = occ->next;
 257           occ->next = new_occ->children;
 258           new_occ->children = occ;
 259
 260           /* Try the next block (it may as well be dominated by BB).  */
 261         }
 262
 263       else if (dom == occ_bb)
 264         {
 265           /* OCC_BB dominates BB.  Tail recurse to look deeper.  */
 266           insert_bb (new_occ, dom, &occ->children);
 267           return;
 268         }
 269
 270       else if (dom != idom)
 271         {
 272           gcc_assert (!dom->aux);
 273
 274           /* There is a dominator between IDOM and BB, add it and make
 275              two children out of NEW_OCC and OCC.  First, remove OCC from
 276              its list.  */
 277           *p_occ = occ->next;
 278           new_occ->next = occ;
 279           occ->next = NULL;
 280
 281           /* None of the previous blocks has DOM as a dominator: if we tail
 282              recursed, we would reexamine them uselessly. Just switch BB with
 283              DOM, and go on looking for blocks dominated by DOM.  */
 284           new_occ = new occurrence (dom, new_occ);
 285         }
 286
 287       else
 288         {
 289           /* Nothing special, go on with the next element.  */
 290           p_occ = &occ->next;
 291         }
 292     }
 293
 294   /* No place was found as a child of IDOM.  Make BB a sibling of IDOM.  */
 295   new_occ->next = *p_head;
 296   *p_head = new_occ;
 297 }
 298
 299 /* Register that we found a division in BB.
 300    IMPORTANCE is a measure of how much weighting to give
 301    that division.  Use IMPORTANCE = 2 to register a single
 302    division.  If the division is going to be found multiple
 303    times use 1 (as it is with squares).  */
 304
 305 static inline void
 306 register_division_in (basic_block bb, int importance)
 307 {
 308   struct occurrence *occ;
 309
 310   occ = (struct occurrence *) bb->aux;
 311   if (!occ)
 312     {
 313       occ = new occurrence (bb, NULL);
 314       insert_bb (occ, ENTRY_BLOCK_PTR_FOR_FN (cfun), &occ_head);
 315     }
 316
 317   occ->bb_has_division = true;
 318   occ->num_divisions += importance;
 319 }
 320
 321
 322 /* Compute the number of divisions that postdominate each block in OCC and
 323    its children.  */
 324
 325 static void
 326 compute_merit (struct occurrence *occ)
 327 {
 328   struct occurrence *occ_child;
 329   basic_block dom = occ->bb;
 330
 331   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 332     {
 333       basic_block bb;
 334       if (occ_child->children)
 335         compute_merit (occ_child);
 336
 337       if (flag_exceptions)
 338         bb = single_noncomplex_succ (dom);
 339       else
 340         bb = dom;
 341
 342       if (dominated_by_p (CDI_POST_DOMINATORS, bb, occ_child->bb))
 343         occ->num_divisions += occ_child->num_divisions;
 344     }
 345 }
 346
 347
 348 /* Return whether USE_STMT is a floating-point division by DEF.  */
 349 static inline bool
 350 is_division_by (gimple *use_stmt, tree def)
 351 {
 352   return is_gimple_assign (use_stmt)
 353          && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 354          && gimple_assign_rhs2 (use_stmt) == def
 355          /* Do not recognize x / x as valid division, as we are getting
 356             confused later by replacing all immediate uses x in such
 357             a stmt.  */
 358          && gimple_assign_rhs1 (use_stmt) != def
 359          && !stmt_can_throw_internal (cfun, use_stmt);
 360 }
 361
 362 /* Return TRUE if USE_STMT is a multiplication of DEF by A.  */
 363 static inline bool
 364 is_mult_by (gimple *use_stmt, tree def, tree a)
 365 {
 366   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 367       && gimple_assign_rhs_code (use_stmt) == MULT_EXPR)
 368     {
 369       tree op0 = gimple_assign_rhs1 (use_stmt);
 370       tree op1 = gimple_assign_rhs2 (use_stmt);
 371
 372       return (op0 == def && op1 == a)
 373               || (op0 == a && op1 == def);
 374     }
 375   return 0;
 376 }
 377
 378 /* Return whether USE_STMT is DEF * DEF.  */
 379 static inline bool
 380 is_square_of (gimple *use_stmt, tree def)
 381 {
 382   return is_mult_by (use_stmt, def, def);
 383 }
 384
 385 /* Return whether USE_STMT is a floating-point division by
 386    DEF * DEF.  */
 387 static inline bool
 388 is_division_by_square (gimple *use_stmt, tree def)
 389 {
 390   if (gimple_code (use_stmt) == GIMPLE_ASSIGN
 391       && gimple_assign_rhs_code (use_stmt) == RDIV_EXPR
 392       && gimple_assign_rhs1 (use_stmt) != gimple_assign_rhs2 (use_stmt)
 393       && !stmt_can_throw_internal (cfun, use_stmt))
 394     {
 395       tree denominator = gimple_assign_rhs2 (use_stmt);
 396       if (TREE_CODE (denominator) == SSA_NAME)
 397         return is_square_of (SSA_NAME_DEF_STMT (denominator), def);
 398     }
 399   return 0;
 400 }
 401
 402 /* Walk the subset of the dominator tree rooted at OCC, setting the
 403    RECIP_DEF field to a definition of 1.0 / DEF that can be used in
 404    the given basic block.  The field may be left NULL, of course,
 405    if it is not possible or profitable to do the optimization.
 406
 407    DEF_BSI is an iterator pointing at the statement defining DEF.
 408    If RECIP_DEF is set, a dominator already has a computation that can
 409    be used.
 410
 411    If should_insert_square_recip is set, then this also inserts
 412    the square of the reciprocal immediately after the definition
 413    of the reciprocal.  */
 414
 415 static void
 416 insert_reciprocals (gimple_stmt_iterator *def_gsi, struct occurrence *occ,
 417                     tree def, tree recip_def, tree square_recip_def,
 418                     int should_insert_square_recip, int threshold)
 419 {
 420   tree type;
 421   gassign *new_stmt, *new_square_stmt;
 422   gimple_stmt_iterator gsi;
 423   struct occurrence *occ_child;
 424
 425   if (!recip_def
 426       && (occ->bb_has_division || !flag_trapping_math)
 427       /* Divide by two as all divisions are counted twice in
 428          the costing loop.  */
 429       && occ->num_divisions / 2 >= threshold)
 430     {
 431       /* Make a variable with the replacement and substitute it.  */
 432       type = TREE_TYPE (def);
 433       recip_def = create_tmp_reg (type, "reciptmp");
 434       new_stmt = gimple_build_assign (recip_def, RDIV_EXPR,
 435                                       build_one_cst (type), def);
 436
 437       if (should_insert_square_recip)
 438         {
 439           square_recip_def = create_tmp_reg (type, "powmult_reciptmp");
 440           new_square_stmt = gimple_build_assign (square_recip_def, MULT_EXPR,
 441                                                  recip_def, recip_def);
 442         }
 443
 444       if (occ->bb_has_division)
 445         {
 446           /* Case 1: insert before an existing division.  */
 447           gsi = gsi_after_labels (occ->bb);
 448           while (!gsi_end_p (gsi)
 449                  && (!is_division_by (gsi_stmt (gsi), def))
 450                  && (!is_division_by_square (gsi_stmt (gsi), def)))
 451             gsi_next (&gsi);
 452
 453           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 454           if (should_insert_square_recip)
 455             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 456         }
 457       else if (def_gsi && occ->bb == gsi_bb (*def_gsi))
 458         {
 459           /* Case 2: insert right after the definition.  Note that this will
 460              never happen if the definition statement can throw, because in
 461              that case the sole successor of the statement's basic block will
 462              dominate all the uses as well.  */
 463           gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 464           if (should_insert_square_recip)
 465             gsi_insert_after (def_gsi, new_square_stmt, GSI_NEW_STMT);
 466         }
 467       else
 468         {
 469           /* Case 3: insert in a basic block not containing defs/uses.  */
 470           gsi = gsi_after_labels (occ->bb);
 471           gsi_insert_before (&gsi, new_stmt, GSI_SAME_STMT);
 472           if (should_insert_square_recip)
 473             gsi_insert_before (&gsi, new_square_stmt, GSI_SAME_STMT);
 474         }
 475
 476       reciprocal_stats.rdivs_inserted++;
 477
 478       occ->recip_def_stmt = new_stmt;
 479     }
 480
 481   occ->recip_def = recip_def;
 482   occ->square_recip_def = square_recip_def;
 483   for (occ_child = occ->children; occ_child; occ_child = occ_child->next)
 484     insert_reciprocals (def_gsi, occ_child, def, recip_def,
 485                         square_recip_def, should_insert_square_recip,
 486                         threshold);
 487 }
 488
 489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
 490    Take as argument the use for (x * x).  */
 491 static inline void
 492 replace_reciprocal_squares (use_operand_p use_p)
 493 {
 494   gimple *use_stmt = USE_STMT (use_p);
 495   basic_block bb = gimple_bb (use_stmt);
 496   struct occurrence *occ = (struct occurrence *) bb->aux;
 497
 498   if (optimize_bb_for_speed_p (bb) && occ->square_recip_def
 499       && occ->recip_def)
 500     {
 501       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 502       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 503       gimple_assign_set_rhs2 (use_stmt, occ->square_recip_def);
 504       SET_USE (use_p, occ->square_recip_def);
 505       fold_stmt_inplace (&gsi);
 506       update_stmt (use_stmt);
 507     }
 508 }
 509
 510
 511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
 512    possible.  */
 513
 514 static inline void
 515 replace_reciprocal (use_operand_p use_p)
 516 {
 517   gimple *use_stmt = USE_STMT (use_p);
 518   basic_block bb = gimple_bb (use_stmt);
 519   struct occurrence *occ = (struct occurrence *) bb->aux;
 520
 521   if (optimize_bb_for_speed_p (bb)
 522       && occ->recip_def && use_stmt != occ->recip_def_stmt)
 523     {
 524       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
 525       gimple_assign_set_rhs_code (use_stmt, MULT_EXPR);
 526       SET_USE (use_p, occ->recip_def);
 527       fold_stmt_inplace (&gsi);
 528       update_stmt (use_stmt);
 529     }
 530 }
 531
 532
 533 /* Free OCC and return one more "struct occurrence" to be freed.  */
 534
 535 static struct occurrence *
 536 free_bb (struct occurrence *occ)
 537 {
 538   struct occurrence *child, *next;
 539
 540   /* First get the two pointers hanging off OCC.  */
 541   next = occ->next;
 542   child = occ->children;
 543   delete occ;
 544
 545   /* Now ensure that we don't recurse unless it is necessary.  */
 546   if (!child)
 547     return next;
 548   else
 549     {
 550       while (next)
 551         next = free_bb (next);
 552
 553       return child;
 554     }
 555 }
 556
 557 /* Transform sequences like
 558    t = sqrt (a)
 559    x = 1.0 / t;
 560    r1 = x * x;
 561    r2 = a * x;
 562    into:
 563    t = sqrt (a)
 564    r1 = 1.0 / a;
 565    r2 = t;
 566    x = r1 * r2;
 567    depending on the uses of x, r1, r2.  This removes one multiplication and
 568    allows the sqrt and division operations to execute in parallel.
 569    DEF_GSI is the gsi of the initial division by sqrt that defines
 570    DEF (x in the example above).  */
 571
 572 static void
 573 optimize_recip_sqrt (gimple_stmt_iterator *def_gsi, tree def)
 574 {
 575   gimple *use_stmt;
 576   imm_use_iterator use_iter;
 577   gimple *stmt = gsi_stmt (*def_gsi);
 578   tree x = def;
 579   tree orig_sqrt_ssa_name = gimple_assign_rhs2 (stmt);
 580   tree div_rhs1 = gimple_assign_rhs1 (stmt);
 581
 582   if (TREE_CODE (orig_sqrt_ssa_name) != SSA_NAME
 583       || TREE_CODE (div_rhs1) != REAL_CST
 584       || !real_equal (&TREE_REAL_CST (div_rhs1), &dconst1))
 585     return;
 586
 587   gcall *sqrt_stmt
 588     = dyn_cast <gcall *> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name));
 589
 590   if (!sqrt_stmt || !gimple_call_lhs (sqrt_stmt))
 591     return;
 592
 593   switch (gimple_call_combined_fn (sqrt_stmt))
 594     {
 595     CASE_CFN_SQRT:
 596     CASE_CFN_SQRT_FN:
 597       break;
 598
 599     default:
 600       return;
 601     }
 602   tree a = gimple_call_arg (sqrt_stmt, 0);
 603
 604   /* We have 'a' and 'x'.  Now analyze the uses of 'x'.  */
 605
 606   /* Statements that use x in x * x.  */
 607   auto_vec<gimple *> sqr_stmts;
 608   /* Statements that use x in a * x.  */
 609   auto_vec<gimple *> mult_stmts;
 610   bool has_other_use = false;
 611   bool mult_on_main_path = false;
 612
 613   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, x)
 614     {
 615       if (is_gimple_debug (use_stmt))
 616         continue;
 617       if (is_square_of (use_stmt, x))
 618         {
 619           sqr_stmts.safe_push (use_stmt);
 620           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 621             mult_on_main_path = true;
 622         }
 623       else if (is_mult_by (use_stmt, x, a))
 624         {
 625           mult_stmts.safe_push (use_stmt);
 626           if (gimple_bb (use_stmt) == gimple_bb (stmt))
 627             mult_on_main_path = true;
 628         }
 629       else
 630         has_other_use = true;
 631     }
 632
 633   /* In the x * x and a * x cases we just rewire stmt operands or
 634      remove multiplications.  In the has_other_use case we introduce
 635      a multiplication so make sure we don't introduce a multiplication
 636      on a path where there was none.  */
 637   if (has_other_use && !mult_on_main_path)
 638     return;
 639
 640   if (sqr_stmts.is_empty () && mult_stmts.is_empty ())
 641     return;
 642
 643   /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
 644      to be able to compose it from the sqr and mult cases.  */
 645   if (has_other_use && (sqr_stmts.is_empty () || mult_stmts.is_empty ()))
 646     return;
 647
 648   if (dump_file)
 649     {
 650       fprintf (dump_file, "Optimizing reciprocal sqrt multiplications of\n");
 651       print_gimple_stmt (dump_file, sqrt_stmt, 0, TDF_NONE);
 652       print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 653       fprintf (dump_file, "\n");
 654     }
 655
 656   bool delete_div = !has_other_use;
 657   tree sqr_ssa_name = NULL_TREE;
 658   if (!sqr_stmts.is_empty ())
 659     {
 660       /* r1 = x * x.  Transform the original
 661          x = 1.0 / t
 662          into
 663          tmp1 = 1.0 / a
 664          r1 = tmp1.  */
 665
 666       sqr_ssa_name
 667         = make_temp_ssa_name (TREE_TYPE (a), NULL, "recip_sqrt_sqr");
 668
 669       if (dump_file)
 670         {
 671           fprintf (dump_file, "Replacing original division\n");
 672           print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 673           fprintf (dump_file, "with new division\n");
 674         }
 675       stmt
 676         = gimple_build_assign (sqr_ssa_name, gimple_assign_rhs_code (stmt),
 677                                gimple_assign_rhs1 (stmt), a);
 678       gsi_insert_before (def_gsi, stmt, GSI_SAME_STMT);
 679       gsi_remove (def_gsi, true);
 680       *def_gsi = gsi_for_stmt (stmt);
 681       fold_stmt_inplace (def_gsi);
 682       update_stmt (stmt);
 683
 684       if (dump_file)
 685         print_gimple_stmt (dump_file, stmt, 0, TDF_NONE);
 686
 687       delete_div = false;
 688       gimple *sqr_stmt;
 689       unsigned int i;
 690       FOR_EACH_VEC_ELT (sqr_stmts, i, sqr_stmt)
 691         {
 692           gimple_stmt_iterator gsi2 = gsi_for_stmt (sqr_stmt);
 693           gimple_assign_set_rhs_from_tree (&gsi2, sqr_ssa_name);
 694           update_stmt (sqr_stmt);
 695         }
 696     }
 697   if (!mult_stmts.is_empty ())
 698     {
 699       /* r2 = a * x.  Transform this into:
 700          r2 = t (The original sqrt (a)).  */
 701       unsigned int i;
 702       gimple *mult_stmt = NULL;
 703       FOR_EACH_VEC_ELT (mult_stmts, i, mult_stmt)
 704         {
 705           gimple_stmt_iterator gsi2 = gsi_for_stmt (mult_stmt);
 706
 707           if (dump_file)
 708             {
 709               fprintf (dump_file, "Replacing squaring multiplication\n");
 710               print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 711               fprintf (dump_file, "with assignment\n");
 712             }
 713           gimple_assign_set_rhs_from_tree (&gsi2, orig_sqrt_ssa_name);
 714           fold_stmt_inplace (&gsi2);
 715           update_stmt (mult_stmt);
 716           if (dump_file)
 717             print_gimple_stmt (dump_file, mult_stmt, 0, TDF_NONE);
 718       }
 719     }
 720
 721   if (has_other_use)
 722     {
 723       /* Using the two temporaries tmp1, tmp2 from above
 724          the original x is now:
 725          x = tmp1 * tmp2.  */
 726       gcc_assert (orig_sqrt_ssa_name);
 727       gcc_assert (sqr_ssa_name);
 728
 729       gimple *new_stmt
 730         = gimple_build_assign (x, MULT_EXPR,
 731                                orig_sqrt_ssa_name, sqr_ssa_name);
 732       gsi_insert_after (def_gsi, new_stmt, GSI_NEW_STMT);
 733       update_stmt (stmt);
 734     }
 735   else if (delete_div)
 736     {
 737       /* Remove the original division.  */
 738       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
 739       gsi_remove (&gsi2, true);
 740       release_defs (stmt);
 741     }
 742   else
 743     release_ssa_name (x);
 744 }
 745
 746 /* Look for floating-point divisions among DEF's uses, and try to
 747    replace them by multiplications with the reciprocal.  Add
 748    as many statements computing the reciprocal as needed.
 749
 750    DEF must be a GIMPLE register of a floating-point type.  */
 751
 752 static void
 753 execute_cse_reciprocals_1 (gimple_stmt_iterator *def_gsi, tree def)
 754 {
 755   use_operand_p use_p, square_use_p;
 756   imm_use_iterator use_iter, square_use_iter;
 757   tree square_def;
 758   struct occurrence *occ;
 759   int count = 0;
 760   int threshold;
 761   int square_recip_count = 0;
 762   int sqrt_recip_count = 0;
 763
 764   gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def)) && TREE_CODE (def) == SSA_NAME);
 765   threshold = targetm.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def)));
 766
 767   /* If DEF is a square (x * x), count the number of divisions by x.
 768      If there are more divisions by x than by (DEF * DEF), prefer to optimize
 769      the reciprocal of x instead of DEF.  This improves cases like:
 770        def = x * x
 771        t0 = a / def
 772        t1 = b / def
 773        t2 = c / x
 774      Reciprocal optimization of x results in 1 division rather than 2 or 3.  */
 775   gimple *def_stmt = SSA_NAME_DEF_STMT (def);
 776
 777   if (is_gimple_assign (def_stmt)
 778       && gimple_assign_rhs_code (def_stmt) == MULT_EXPR
 779       && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
 780       && gimple_assign_rhs1 (def_stmt) == gimple_assign_rhs2 (def_stmt))
 781     {
 782       tree op0 = gimple_assign_rhs1 (def_stmt);
 783
 784       FOR_EACH_IMM_USE_FAST (use_p, use_iter, op0)
 785         {
 786           gimple *use_stmt = USE_STMT (use_p);
 787           if (is_division_by (use_stmt, op0))
 788             sqrt_recip_count++;
 789         }
 790     }
 791
 792   FOR_EACH_IMM_USE_FAST (use_p, use_iter, def)
 793     {
 794       gimple *use_stmt = USE_STMT (use_p);
 795       if (is_division_by (use_stmt, def))
 796         {
 797           register_division_in (gimple_bb (use_stmt), 2);
 798           count++;
 799         }
 800
 801       if (is_square_of (use_stmt, def))
 802         {
 803           square_def = gimple_assign_lhs (use_stmt);
 804           FOR_EACH_IMM_USE_FAST (square_use_p, square_use_iter, square_def)
 805             {
 806               gimple *square_use_stmt = USE_STMT (square_use_p);
 807               if (is_division_by (square_use_stmt, square_def))
 808                 {
 809                   /* This is executed twice for each division by a square.  */
 810                   register_division_in (gimple_bb (square_use_stmt), 1);
 811                   square_recip_count++;
 812                 }
 813             }
 814         }
 815     }
 816
 817   /* Square reciprocals were counted twice above.  */
 818   square_recip_count /= 2;
 819
 820   /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x).  */
 821   if (sqrt_recip_count > square_recip_count)
 822     goto out;
 823
 824   /* Do the expensive part only if we can hope to optimize something.  */
 825   if (count + square_recip_count >= threshold && count >= 1)
 826     {
 827       gimple *use_stmt;
 828       for (occ = occ_head; occ; occ = occ->next)
 829         {
 830           compute_merit (occ);
 831           insert_reciprocals (def_gsi, occ, def, NULL, NULL,
 832                               square_recip_count, threshold);
 833         }
 834
 835       FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, def)
 836         {
 837           if (is_division_by (use_stmt, def))
 838             {
 839               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 840                 replace_reciprocal (use_p);
 841             }
 842           else if (square_recip_count > 0 && is_square_of (use_stmt, def))
 843             {
 844               FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter)
 845                 {
 846                   /* Find all uses of the square that are divisions and
 847                    * replace them by multiplications with the inverse.  */
 848                   imm_use_iterator square_iterator;
 849                   gimple *powmult_use_stmt = USE_STMT (use_p);
 850                   tree powmult_def_name = gimple_assign_lhs (powmult_use_stmt);
 851
 852                   FOR_EACH_IMM_USE_STMT (powmult_use_stmt,
 853                                          square_iterator, powmult_def_name)
 854                     FOR_EACH_IMM_USE_ON_STMT (square_use_p, square_iterator)
 855                       {
 856                         gimple *powmult_use_stmt = USE_STMT (square_use_p);
 857                         if (is_division_by (powmult_use_stmt, powmult_def_name))
 858                           replace_reciprocal_squares (square_use_p);
 859                       }
 860                 }
 861             }
 862         }
 863     }
 864
 865 out:
 866   for (occ = occ_head; occ; )
 867     occ = free_bb (occ);
 868
 869   occ_head = NULL;
 870 }
 871
 872 /* Return an internal function that implements the reciprocal of CALL,
 873    or IFN_LAST if there is no such function that the target supports.  */
 874
 875 internal_fn
 876 internal_fn_reciprocal (gcall *call)
 877 {
 878   internal_fn ifn;
 879
 880   switch (gimple_call_combined_fn (call))
 881     {
 882     CASE_CFN_SQRT:
 883     CASE_CFN_SQRT_FN:
 884       ifn = IFN_RSQRT;
 885       break;
 886
 887     default:
 888       return IFN_LAST;
 889     }
 890
 891   tree_pair types = direct_internal_fn_types (ifn, call);
 892   if (!direct_internal_fn_supported_p (ifn, types, OPTIMIZE_FOR_SPEED))
 893     return IFN_LAST;
 894
 895   return ifn;
 896 }
 897
 898 /* Go through all the floating-point SSA_NAMEs, and call
 899    execute_cse_reciprocals_1 on each of them.  */
 900 namespace {
 901
 902 const pass_data pass_data_cse_reciprocals =
 903 {
 904   GIMPLE_PASS, /* type */
 905   "recip", /* name */
 906   OPTGROUP_NONE, /* optinfo_flags */
 907   TV_TREE_RECIP, /* tv_id */
 908   PROP_ssa, /* properties_required */
 909   0, /* properties_provided */
 910   0, /* properties_destroyed */
 911   0, /* todo_flags_start */
 912   TODO_update_ssa, /* todo_flags_finish */
 913 };
 914
 915 class pass_cse_reciprocals : public gimple_opt_pass
 916 {
 917 public:
 918   pass_cse_reciprocals (gcc::context *ctxt)
 919     : gimple_opt_pass (pass_data_cse_reciprocals, ctxt)
 920   {}
 921
 922   /* opt_pass methods: */
 923   bool gate (function *) final override
 924   {
 925     return optimize && flag_reciprocal_math;
 926   }
 927   unsigned int execute (function *) final override;
 928
 929 }; // class pass_cse_reciprocals
 930
 931 unsigned int
 932 pass_cse_reciprocals::execute (function *fun)
 933 {
 934   basic_block bb;
 935   tree arg;
 936
 937   occ_pool = new object_allocator<occurrence> ("dominators for recip");
 938
 939   memset (&reciprocal_stats, 0, sizeof (reciprocal_stats));
 940   calculate_dominance_info (CDI_DOMINATORS);
 941   calculate_dominance_info (CDI_POST_DOMINATORS);
 942
 943   if (flag_checking)
 944     FOR_EACH_BB_FN (bb, fun)
 945       gcc_assert (!bb->aux);
 946
 947   for (arg = DECL_ARGUMENTS (fun->decl); arg; arg = DECL_CHAIN (arg))
 948     if (FLOAT_TYPE_P (TREE_TYPE (arg))
 949         && is_gimple_reg (arg))
 950       {
 951         tree name = ssa_default_def (fun, arg);
 952         if (name)
 953           execute_cse_reciprocals_1 (NULL, name);
 954       }
 955
 956   FOR_EACH_BB_FN (bb, fun)
 957     {
 958       tree def;
 959
 960       for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
 961            gsi_next (&gsi))
 962         {
 963           gphi *phi = gsi.phi ();
 964           def = PHI_RESULT (phi);
 965           if (! virtual_operand_p (def)
 966               && FLOAT_TYPE_P (TREE_TYPE (def)))
 967             execute_cse_reciprocals_1 (NULL, def);
 968         }
 969
 970       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 971            gsi_next (&gsi))
 972         {
 973           gimple *stmt = gsi_stmt (gsi);
 974
 975           if (gimple_has_lhs (stmt)
 976               && (def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF)) != NULL
 977               && FLOAT_TYPE_P (TREE_TYPE (def))
 978               && TREE_CODE (def) == SSA_NAME)
 979             {
 980               execute_cse_reciprocals_1 (&gsi, def);
 981               stmt = gsi_stmt (gsi);
 982               if (flag_unsafe_math_optimizations
 983                   && is_gimple_assign (stmt)
 984                   && gimple_assign_lhs (stmt) == def
 985                   && !stmt_can_throw_internal (cfun, stmt)
 986                   && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
 987                 optimize_recip_sqrt (&gsi, def);
 988             }
 989         }
 990
 991       if (optimize_bb_for_size_p (bb))
 992         continue;
 993
 994       /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b).  */
 995       for (gimple_stmt_iterator gsi = gsi_after_labels (bb); !gsi_end_p (gsi);
 996            gsi_next (&gsi))
 997         {
 998           gimple *stmt = gsi_stmt (gsi);
 999
1000           if (is_gimple_assign (stmt)
1001               && gimple_assign_rhs_code (stmt) == RDIV_EXPR)
1002             {
1003               tree arg1 = gimple_assign_rhs2 (stmt);
1004               gimple *stmt1;
1005
1006               if (TREE_CODE (arg1) != SSA_NAME)
1007                 continue;
1008
1009               stmt1 = SSA_NAME_DEF_STMT (arg1);
1010
1011               if (is_gimple_call (stmt1)
1012                   && gimple_call_lhs (stmt1))
1013                 {
1014                   bool fail;
1015                   imm_use_iterator ui;
1016                   use_operand_p use_p;
1017                   tree fndecl = NULL_TREE;
1018
1019                   gcall *call = as_a <gcall *> (stmt1);
1020                   internal_fn ifn = internal_fn_reciprocal (call);
1021                   if (ifn == IFN_LAST)
1022                     {
1023                       fndecl = gimple_call_fndecl (call);
1024                       if (!fndecl
1025                           || !fndecl_built_in_p (fndecl, BUILT_IN_MD))
1026                         continue;
1027                       fndecl = targetm.builtin_reciprocal (fndecl);
1028                       if (!fndecl)
1029                         continue;
1030                     }
1031
1032                   /* Check that all uses of the SSA name are divisions,
1033                      otherwise replacing the defining statement will do
1034                      the wrong thing.  */
1035                   fail = false;
1036                   FOR_EACH_IMM_USE_FAST (use_p, ui, arg1)
1037                     {
1038                       gimple *stmt2 = USE_STMT (use_p);
1039                       if (is_gimple_debug (stmt2))
1040                         continue;
1041                       if (!is_gimple_assign (stmt2)
1042                           || gimple_assign_rhs_code (stmt2) != RDIV_EXPR
1043                           || gimple_assign_rhs1 (stmt2) == arg1
1044                           || gimple_assign_rhs2 (stmt2) != arg1)
1045                         {
1046                           fail = true;
1047                           break;
1048                         }
1049                     }
1050                   if (fail)
1051                     continue;
1052
1053                   gimple_replace_ssa_lhs (call, arg1);
1054                   if (gimple_call_internal_p (call) != (ifn != IFN_LAST))
1055                     {
1056                       auto_vec<tree, 4> args;
1057                       for (unsigned int i = 0;
1058                            i < gimple_call_num_args (call); i++)
1059                         args.safe_push (gimple_call_arg (call, i));
1060                       gcall *stmt2;
1061                       if (ifn == IFN_LAST)
1062                         stmt2 = gimple_build_call_vec (fndecl, args);
1063                       else
1064                         stmt2 = gimple_build_call_internal_vec (ifn, args);
1065                       gimple_call_set_lhs (stmt2, arg1);
1066                       gimple_move_vops (stmt2, call);
1067                       gimple_call_set_nothrow (stmt2,
1068                                                gimple_call_nothrow_p (call));
1069                       gimple_stmt_iterator gsi2 = gsi_for_stmt (call);
1070                       gsi_replace (&gsi2, stmt2, true);
1071                     }
1072                   else
1073                     {
1074                       if (ifn == IFN_LAST)
1075                         gimple_call_set_fndecl (call, fndecl);
1076                       else
1077                         gimple_call_set_internal_fn (call, ifn);
1078                       update_stmt (call);
1079                     }
1080                   reciprocal_stats.rfuncs_inserted++;
1081
1082                   FOR_EACH_IMM_USE_STMT (stmt, ui, arg1)
1083                     {
1084                       gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1085                       gimple_assign_set_rhs_code (stmt, MULT_EXPR);
1086                       fold_stmt_inplace (&gsi);
1087                       update_stmt (stmt);
1088                     }
1089                 }
1090             }
1091         }
1092     }
1093
1094   statistics_counter_event (fun, "reciprocal divs inserted",
1095                             reciprocal_stats.rdivs_inserted);
1096   statistics_counter_event (fun, "reciprocal functions inserted",
1097                             reciprocal_stats.rfuncs_inserted);
1098
1099   free_dominance_info (CDI_DOMINATORS);
1100   free_dominance_info (CDI_POST_DOMINATORS);
1101   delete occ_pool;
1102   return 0;
1103 }
1104
1105 } // anon namespace
1106
1107 gimple_opt_pass *
1108 make_pass_cse_reciprocals (gcc::context *ctxt)
1109 {
1110   return new pass_cse_reciprocals (ctxt);
1111 }
1112
1113 /* If NAME is the result of a type conversion, look for other
1114    equivalent dominating or dominated conversions, and replace all
1115    uses with the earliest dominating name, removing the redundant
1116    conversions.  Return the prevailing name.  */
1117
1118 static tree
1119 execute_cse_conv_1 (tree name, bool *cfg_changed)
1120 {
1121   if (SSA_NAME_IS_DEFAULT_DEF (name)
1122       || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
1123     return name;
1124
1125   gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1126
1127   if (!gimple_assign_cast_p (def_stmt))
1128     return name;
1129
1130   tree src = gimple_assign_rhs1 (def_stmt);
1131
1132   if (TREE_CODE (src) != SSA_NAME)
1133     return name;
1134
1135   imm_use_iterator use_iter;
1136   gimple *use_stmt;
1137
1138   /* Find the earliest dominating def.    */
1139   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1140     {
1141       if (use_stmt == def_stmt
1142           || !gimple_assign_cast_p (use_stmt))
1143         continue;
1144
1145       tree lhs = gimple_assign_lhs (use_stmt);
1146
1147       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1148           || (gimple_assign_rhs1 (use_stmt)
1149               != gimple_assign_rhs1 (def_stmt))
1150           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1151         continue;
1152
1153       bool use_dominates;
1154       if (gimple_bb (def_stmt) == gimple_bb (use_stmt))
1155         {
1156           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1157           while (!gsi_end_p (gsi) && gsi_stmt (gsi) != def_stmt)
1158             gsi_next (&gsi);
1159           use_dominates = !gsi_end_p (gsi);
1160         }
1161       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt),
1162                                gimple_bb (def_stmt)))
1163         use_dominates = false;
1164       else if (dominated_by_p (CDI_DOMINATORS, gimple_bb (def_stmt),
1165                                gimple_bb (use_stmt)))
1166         use_dominates = true;
1167       else
1168         continue;
1169
1170       if (use_dominates)
1171         {
1172           std::swap (name, lhs);
1173           std::swap (def_stmt, use_stmt);
1174         }
1175     }
1176
1177   /* Now go through all uses of SRC again, replacing the equivalent
1178      dominated conversions.  We may replace defs that were not
1179      dominated by the then-prevailing defs when we first visited
1180      them.  */
1181   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src)
1182     {
1183       if (use_stmt == def_stmt
1184           || !gimple_assign_cast_p (use_stmt))
1185         continue;
1186
1187       tree lhs = gimple_assign_lhs (use_stmt);
1188
1189       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs)
1190           || (gimple_assign_rhs1 (use_stmt)
1191               != gimple_assign_rhs1 (def_stmt))
1192           || !types_compatible_p (TREE_TYPE (name), TREE_TYPE (lhs)))
1193         continue;
1194
1195       basic_block use_bb = gimple_bb (use_stmt);
1196       if (gimple_bb (def_stmt) == use_bb
1197           || dominated_by_p (CDI_DOMINATORS, use_bb, gimple_bb (def_stmt)))
1198         {
1199           sincos_stats.conv_removed++;
1200
1201           gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
1202           replace_uses_by (lhs, name);
1203           if (gsi_remove (&gsi, true)
1204               && gimple_purge_dead_eh_edges (use_bb))
1205             *cfg_changed = true;
1206           release_defs (use_stmt);
1207         }
1208     }
1209
1210   return name;
1211 }
1212
1213 /* Records an occurrence at statement USE_STMT in the vector of trees
1214    STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1215    is not yet initialized.  Returns true if the occurrence was pushed on
1216    the vector.  Adjusts *TOP_BB to be the basic block dominating all
1217    statements in the vector.  */
1218
1219 static bool
1220 maybe_record_sincos (vec<gimple *> *stmts,
1221                      basic_block *top_bb, gimple *use_stmt)
1222 {
1223   basic_block use_bb = gimple_bb (use_stmt);
1224   if (*top_bb
1225       && (*top_bb == use_bb
1226           || dominated_by_p (CDI_DOMINATORS, use_bb, *top_bb)))
1227     stmts->safe_push (use_stmt);
1228   else if (!*top_bb
1229            || dominated_by_p (CDI_DOMINATORS, *top_bb, use_bb))
1230     {
1231       stmts->safe_push (use_stmt);
1232       *top_bb = use_bb;
1233     }
1234   else
1235     return false;
1236
1237   return true;
1238 }
1239
1240 /* Look for sin, cos and cexpi calls with the same argument NAME and
1241    create a single call to cexpi CSEing the result in this case.
1242    We first walk over all immediate uses of the argument collecting
1243    statements that we can CSE in a vector and in a second pass replace
1244    the statement rhs with a REALPART or IMAGPART expression on the
1245    result of the cexpi call we insert before the use statement that
1246    dominates all other candidates.  */
1247
1248 static bool
1249 execute_cse_sincos_1 (tree name)
1250 {
1251   gimple_stmt_iterator gsi;
1252   imm_use_iterator use_iter;
1253   tree fndecl, res, type = NULL_TREE;
1254   gimple *def_stmt, *use_stmt, *stmt;
1255   int seen_cos = 0, seen_sin = 0, seen_cexpi = 0;
1256   auto_vec<gimple *> stmts;
1257   basic_block top_bb = NULL;
1258   int i;
1259   bool cfg_changed = false;
1260
1261   name = execute_cse_conv_1 (name, &cfg_changed);
1262
1263   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name)
1264     {
1265       if (gimple_code (use_stmt) != GIMPLE_CALL
1266           || !gimple_call_lhs (use_stmt))
1267         continue;
1268
1269       switch (gimple_call_combined_fn (use_stmt))
1270         {
1271         CASE_CFN_COS:
1272           seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1273           break;
1274
1275         CASE_CFN_SIN:
1276           seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1277           break;
1278
1279         CASE_CFN_CEXPI:
1280           seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0;
1281           break;
1282
1283         default:;
1284           continue;
1285         }
1286
1287       tree t = mathfn_built_in_type (gimple_call_combined_fn (use_stmt));
1288       if (!type)
1289         {
1290           type = t;
1291           t = TREE_TYPE (name);
1292         }
1293       /* This checks that NAME has the right type in the first round,
1294          and, in subsequent rounds, that the built_in type is the same
1295          type, or a compatible type.  */
1296       if (type != t && !types_compatible_p (type, t))
1297         return false;
1298     }
1299   if (seen_cos + seen_sin + seen_cexpi <= 1)
1300     return false;
1301
1302   /* Simply insert cexpi at the beginning of top_bb but not earlier than
1303      the name def statement.  */
1304   fndecl = mathfn_built_in (type, BUILT_IN_CEXPI);
1305   if (!fndecl)
1306     return false;
1307   stmt = gimple_build_call (fndecl, 1, name);
1308   res = make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl)), stmt, "sincostmp");
1309   gimple_call_set_lhs (stmt, res);
1310
1311   def_stmt = SSA_NAME_DEF_STMT (name);
1312   if (!SSA_NAME_IS_DEFAULT_DEF (name)
1313       && gimple_code (def_stmt) != GIMPLE_PHI
1314       && gimple_bb (def_stmt) == top_bb)
1315     {
1316       gsi = gsi_for_stmt (def_stmt);
1317       gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1318     }
1319   else
1320     {
1321       gsi = gsi_after_labels (top_bb);
1322       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1323     }
1324   sincos_stats.inserted++;
1325
1326   /* And adjust the recorded old call sites.  */
1327   for (i = 0; stmts.iterate (i, &use_stmt); ++i)
1328     {
1329       tree rhs = NULL;
1330
1331       switch (gimple_call_combined_fn (use_stmt))
1332         {
1333         CASE_CFN_COS:
1334           rhs = fold_build1 (REALPART_EXPR, type, res);
1335           break;
1336
1337         CASE_CFN_SIN:
1338           rhs = fold_build1 (IMAGPART_EXPR, type, res);
1339           break;
1340
1341         CASE_CFN_CEXPI:
1342           rhs = res;
1343           break;
1344
1345         default:;
1346           gcc_unreachable ();
1347         }
1348
1349         /* Replace call with a copy.  */
1350         stmt = gimple_build_assign (gimple_call_lhs (use_stmt), rhs);
1351
1352         gsi = gsi_for_stmt (use_stmt);
1353         gsi_replace (&gsi, stmt, true);
1354         if (gimple_purge_dead_eh_edges (gimple_bb (stmt)))
1355           cfg_changed = true;
1356     }
1357
1358   return cfg_changed;
1359 }
1360
1361 /* To evaluate powi(x,n), the floating point value x raised to the
1362    constant integer exponent n, we use a hybrid algorithm that
1363    combines the "window method" with look-up tables.  For an
1364    introduction to exponentiation algorithms and "addition chains",
1365    see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1366    "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1367    3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1368    Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998.  */
1369
1370 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1371    multiplications to inline before calling the system library's pow
1372    function.  powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1373    so this default never requires calling pow, powf or powl.  */
1374
1375 #ifndef POWI_MAX_MULTS
1376 #define POWI_MAX_MULTS  (2*HOST_BITS_PER_WIDE_INT-2)
1377 #endif
1378
1379 /* The size of the "optimal power tree" lookup table.  All
1380    exponents less than this value are simply looked up in the
1381    powi_table below.  This threshold is also used to size the
1382    cache of pseudo registers that hold intermediate results.  */
1383 #define POWI_TABLE_SIZE 256
1384
1385 /* The size, in bits of the window, used in the "window method"
1386    exponentiation algorithm.  This is equivalent to a radix of
1387    (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method".  */
1388 #define POWI_WINDOW_SIZE 3
1389
1390 /* The following table is an efficient representation of an
1391    "optimal power tree".  For each value, i, the corresponding
1392    value, j, in the table states than an optimal evaluation
1393    sequence for calculating pow(x,i) can be found by evaluating
1394    pow(x,j)*pow(x,i-j).  An optimal power tree for the first
1395    100 integers is given in Knuth's "Seminumerical algorithms".  */
1396
1397 static const unsigned char powi_table[POWI_TABLE_SIZE] =
1398   {
1399       0,   1,   1,   2,   2,   3,   3,   4,  /*   0 -   7 */
1400       4,   6,   5,   6,   6,  10,   7,   9,  /*   8 -  15 */
1401       8,  16,   9,  16,  10,  12,  11,  13,  /*  16 -  23 */
1402      12,  17,  13,  18,  14,  24,  15,  26,  /*  24 -  31 */
1403      16,  17,  17,  19,  18,  33,  19,  26,  /*  32 -  39 */
1404      20,  25,  21,  40,  22,  27,  23,  44,  /*  40 -  47 */
1405      24,  32,  25,  34,  26,  29,  27,  44,  /*  48 -  55 */
1406      28,  31,  29,  34,  30,  60,  31,  36,  /*  56 -  63 */
1407      32,  64,  33,  34,  34,  46,  35,  37,  /*  64 -  71 */
1408      36,  65,  37,  50,  38,  48,  39,  69,  /*  72 -  79 */
1409      40,  49,  41,  43,  42,  51,  43,  58,  /*  80 -  87 */
1410      44,  64,  45,  47,  46,  59,  47,  76,  /*  88 -  95 */
1411      48,  65,  49,  66,  50,  67,  51,  66,  /*  96 - 103 */
1412      52,  70,  53,  74,  54, 104,  55,  74,  /* 104 - 111 */
1413      56,  64,  57,  69,  58,  78,  59,  68,  /* 112 - 119 */
1414      60,  61,  61,  80,  62,  75,  63,  68,  /* 120 - 127 */
1415      64,  65,  65, 128,  66, 129,  67,  90,  /* 128 - 135 */
1416      68,  73,  69, 131,  70,  94,  71,  88,  /* 136 - 143 */
1417      72, 128,  73,  98,  74, 132,  75, 121,  /* 144 - 151 */
1418      76, 102,  77, 124,  78, 132,  79, 106,  /* 152 - 159 */
1419      80,  97,  81, 160,  82,  99,  83, 134,  /* 160 - 167 */
1420      84,  86,  85,  95,  86, 160,  87, 100,  /* 168 - 175 */
1421      88, 113,  89,  98,  90, 107,  91, 122,  /* 176 - 183 */
1422      92, 111,  93, 102,  94, 126,  95, 150,  /* 184 - 191 */
1423      96, 128,  97, 130,  98, 133,  99, 195,  /* 192 - 199 */
1424     100, 128, 101, 123, 102, 164, 103, 138,  /* 200 - 207 */
1425     104, 145, 105, 146, 106, 109, 107, 149,  /* 208 - 215 */
1426     108, 200, 109, 146, 110, 170, 111, 157,  /* 216 - 223 */
1427     112, 128, 113, 130, 114, 182, 115, 132,  /* 224 - 231 */
1428     116, 200, 117, 132, 118, 158, 119, 206,  /* 232 - 239 */
1429     120, 240, 121, 162, 122, 147, 123, 152,  /* 240 - 247 */
1430     124, 166, 125, 214, 126, 138, 127, 153,  /* 248 - 255 */
1431   };
1432
1433
1434 /* Return the number of multiplications required to calculate
1435    powi(x,n) where n is less than POWI_TABLE_SIZE.  This is a
1436    subroutine of powi_cost.  CACHE is an array indicating
1437    which exponents have already been calculated.  */
1438
1439 static int
1440 powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
1441 {
1442   /* If we've already calculated this exponent, then this evaluation
1443      doesn't require any additional multiplications.  */
1444   if (cache[n])
1445     return 0;
1446
1447   cache[n] = true;
1448   return powi_lookup_cost (n - powi_table[n], cache)
1449          + powi_lookup_cost (powi_table[n], cache) + 1;
1450 }
1451
1452 /* Return the number of multiplications required to calculate
1453    powi(x,n) for an arbitrary x, given the exponent N.  This
1454    function needs to be kept in sync with powi_as_mults below.  */
1455
1456 static int
1457 powi_cost (HOST_WIDE_INT n)
1458 {
1459   bool cache[POWI_TABLE_SIZE];
1460   unsigned HOST_WIDE_INT digit;
1461   unsigned HOST_WIDE_INT val;
1462   int result;
1463
1464   if (n == 0)
1465     return 0;
1466
1467   /* Ignore the reciprocal when calculating the cost.  */
1468   val = absu_hwi (n);
1469
1470   /* Initialize the exponent cache.  */
1471   memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
1472   cache[1] = true;
1473
1474   result = 0;
1475
1476   while (val >= POWI_TABLE_SIZE)
1477     {
1478       if (val & 1)
1479         {
1480           digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
1481           result += powi_lookup_cost (digit, cache)
1482                     + POWI_WINDOW_SIZE + 1;
1483           val >>= POWI_WINDOW_SIZE;
1484         }
1485       else
1486         {
1487           val >>= 1;
1488           result++;
1489         }
1490     }
1491
1492   return result + powi_lookup_cost (val, cache);
1493 }
1494
1495 /* Recursive subroutine of powi_as_mults.  This function takes the
1496    array, CACHE, of already calculated exponents and an exponent N and
1497    returns a tree that corresponds to CACHE[1]**N, with type TYPE.  */
1498
1499 static tree
1500 powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
1501                  unsigned HOST_WIDE_INT n, tree *cache)
1502 {
1503   tree op0, op1, ssa_target;
1504   unsigned HOST_WIDE_INT digit;
1505   gassign *mult_stmt;
1506
1507   if (n < POWI_TABLE_SIZE && cache[n])
1508     return cache[n];
1509
1510   ssa_target = make_temp_ssa_name (type, NULL, "powmult");
1511
1512   if (n < POWI_TABLE_SIZE)
1513     {
1514       cache[n] = ssa_target;
1515       op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache);
1516       op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache);
1517     }
1518   else if (n & 1)
1519     {
1520       digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
1521       op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache);
1522       op1 = powi_as_mults_1 (gsi, loc, type, digit, cache);
1523     }
1524   else
1525     {
1526       op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache);
1527       op1 = op0;
1528     }
1529
1530   mult_stmt = gimple_build_assign (ssa_target, MULT_EXPR, op0, op1);
1531   gimple_set_location (mult_stmt, loc);
1532   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
1533
1534   return ssa_target;
1535 }
1536
1537 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1538    This function needs to be kept in sync with powi_cost above.  */
1539
1540 tree
1541 powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
1542                tree arg0, HOST_WIDE_INT n)
1543 {
1544   tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0);
1545   gassign *div_stmt;
1546   tree target;
1547
1548   if (n == 0)
1549     return build_one_cst (type);
1550
1551   memset (cache, 0, sizeof (cache));
1552   cache[1] = arg0;
1553
1554   result = powi_as_mults_1 (gsi, loc, type, absu_hwi (n), cache);
1555   if (n >= 0)
1556     return result;
1557
1558   /* If the original exponent was negative, reciprocate the result.  */
1559   target = make_temp_ssa_name (type, NULL, "powmult");
1560   div_stmt = gimple_build_assign (target, RDIV_EXPR,
1561                                   build_real (type, dconst1), result);
1562   gimple_set_location (div_stmt, loc);
1563   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
1564
1565   return target;
1566 }
1567
1568 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1569    location info LOC.  If the arguments are appropriate, create an
1570    equivalent sequence of statements prior to GSI using an optimal
1571    number of multiplications, and return an expession holding the
1572    result.  */
1573
1574 static tree
1575 gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
1576                             tree arg0, HOST_WIDE_INT n)
1577 {
1578   if ((n >= -1 && n <= 2)
1579       || (optimize_function_for_speed_p (cfun)
1580           && powi_cost (n) <= POWI_MAX_MULTS))
1581     return powi_as_mults (gsi, loc, arg0, n);
1582
1583   return NULL_TREE;
1584 }
1585
1586 /* Build a gimple call statement that calls FN with argument ARG.
1587    Set the lhs of the call statement to a fresh SSA name.  Insert the
1588    statement prior to GSI's current position, and return the fresh
1589    SSA name.  */
1590
1591 static tree
1592 build_and_insert_call (gimple_stmt_iterator *gsi, location_t loc,
1593                        tree fn, tree arg)
1594 {
1595   gcall *call_stmt;
1596   tree ssa_target;
1597
1598   call_stmt = gimple_build_call (fn, 1, arg);
1599   ssa_target = make_temp_ssa_name (TREE_TYPE (arg), NULL, "powroot");
1600   gimple_set_lhs (call_stmt, ssa_target);
1601   gimple_set_location (call_stmt, loc);
1602   gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
1603
1604   return ssa_target;
1605 }
1606
1607 /* Build a gimple binary operation with the given CODE and arguments
1608    ARG0, ARG1, assigning the result to a new SSA name for variable
1609    TARGET.  Insert the statement prior to GSI's current position, and
1610    return the fresh SSA name.*/
1611
1612 static tree
1613 build_and_insert_binop (gimple_stmt_iterator *gsi, location_t loc,
1614                         const char *name, enum tree_code code,
1615                         tree arg0, tree arg1)
1616 {
1617   tree result = make_temp_ssa_name (TREE_TYPE (arg0), NULL, name);
1618   gassign *stmt = gimple_build_assign (result, code, arg0, arg1);
1619   gimple_set_location (stmt, loc);
1620   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1621   return result;
1622 }
1623
1624 /* Build a gimple reference operation with the given CODE and argument
1625    ARG, assigning the result to a new SSA name of TYPE with NAME.
1626    Insert the statement prior to GSI's current position, and return
1627    the fresh SSA name.  */
1628
1629 static inline tree
1630 build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type,
1631                       const char *name, enum tree_code code, tree arg0)
1632 {
1633   tree result = make_temp_ssa_name (type, NULL, name);
1634   gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0));
1635   gimple_set_location (stmt, loc);
1636   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1637   return result;
1638 }
1639
1640 /* Build a gimple assignment to cast VAL to TYPE.  Insert the statement
1641    prior to GSI's current position, and return the fresh SSA name.  */
1642
1643 static tree
1644 build_and_insert_cast (gimple_stmt_iterator *gsi, location_t loc,
1645                        tree type, tree val)
1646 {
1647   tree result = make_ssa_name (type);
1648   gassign *stmt = gimple_build_assign (result, NOP_EXPR, val);
1649   gimple_set_location (stmt, loc);
1650   gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
1651   return result;
1652 }
1653
1654 struct pow_synth_sqrt_info
1655 {
1656   bool *factors;
1657   unsigned int deepest;
1658   unsigned int num_mults;
1659 };
1660
1661 /* Return true iff the real value C can be represented as a
1662    sum of powers of 0.5 up to N.  That is:
1663    C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1664    Record in INFO the various parameters of the synthesis algorithm such
1665    as the factors a[i], the maximum 0.5 power and the number of
1666    multiplications that will be required.  */
1667
1668 bool
1669 representable_as_half_series_p (REAL_VALUE_TYPE c, unsigned n,
1670                                  struct pow_synth_sqrt_info *info)
1671 {
1672   REAL_VALUE_TYPE factor = dconsthalf;
1673   REAL_VALUE_TYPE remainder = c;
1674
1675   info->deepest = 0;
1676   info->num_mults = 0;
1677   memset (info->factors, 0, n * sizeof (bool));
1678
1679   for (unsigned i = 0; i < n; i++)
1680     {
1681       REAL_VALUE_TYPE res;
1682
1683       /* If something inexact happened bail out now.  */
1684       if (real_arithmetic (&res, MINUS_EXPR, &remainder, &factor))
1685         return false;
1686
1687       /* We have hit zero.  The number is representable as a sum
1688          of powers of 0.5.  */
1689       if (real_equal (&res, &dconst0))
1690         {
1691           info->factors[i] = true;
1692           info->deepest = i + 1;
1693           return true;
1694         }
1695       else if (!REAL_VALUE_NEGATIVE (res))
1696         {
1697           remainder = res;
1698           info->factors[i] = true;
1699           info->num_mults++;
1700         }
1701       else
1702         info->factors[i] = false;
1703
1704       real_arithmetic (&factor, MULT_EXPR, &factor, &dconsthalf);
1705     }
1706   return false;
1707 }
1708
1709 /* Return the tree corresponding to FN being applied
1710    to ARG N times at GSI and LOC.
1711    Look up previous results from CACHE if need be.
1712    cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times.  */
1713
1714 static tree
1715 get_fn_chain (tree arg, unsigned int n, gimple_stmt_iterator *gsi,
1716               tree fn, location_t loc, tree *cache)
1717 {
1718   tree res = cache[n];
1719   if (!res)
1720     {
1721       tree prev = get_fn_chain (arg, n - 1, gsi, fn, loc, cache);
1722       res = build_and_insert_call (gsi, loc, fn, prev);
1723       cache[n] = res;
1724     }
1725
1726   return res;
1727 }
1728
1729 /* Print to STREAM the repeated application of function FNAME to ARG
1730    N times.  So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1731    "foo (foo (x))".  */
1732
1733 static void
1734 print_nested_fn (FILE* stream, const char *fname, const char* arg,
1735                  unsigned int n)
1736 {
1737   if (n == 0)
1738     fprintf (stream, "%s", arg);
1739   else
1740     {
1741       fprintf (stream, "%s (", fname);
1742       print_nested_fn (stream, fname, arg, n - 1);
1743       fprintf (stream, ")");
1744     }
1745 }
1746
1747 /* Print to STREAM the fractional sequence of sqrt chains
1748    applied to ARG, described by INFO.  Used for the dump file.  */
1749
1750 static void
1751 dump_fractional_sqrt_sequence (FILE *stream, const char *arg,
1752                                 struct pow_synth_sqrt_info *info)
1753 {
1754   for (unsigned int i = 0; i < info->deepest; i++)
1755     {
1756       bool is_set = info->factors[i];
1757       if (is_set)
1758         {
1759           print_nested_fn (stream, "sqrt", arg, i + 1);
1760           if (i != info->deepest - 1)
1761             fprintf (stream, " * ");
1762         }
1763     }
1764 }
1765
1766 /* Print to STREAM a representation of raising ARG to an integer
1767    power N.  Used for the dump file.  */
1768
1769 static void
1770 dump_integer_part (FILE *stream, const char* arg, HOST_WIDE_INT n)
1771 {
1772   if (n > 1)
1773     fprintf (stream, "powi (%s, " HOST_WIDE_INT_PRINT_DEC ")", arg, n);
1774   else if (n == 1)
1775     fprintf (stream, "%s", arg);
1776 }
1777
1778 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1779    square roots.  Place at GSI and LOC.  Limit the maximum depth
1780    of the sqrt chains to MAX_DEPTH.  Return the tree holding the
1781    result of the expanded sequence or NULL_TREE if the expansion failed.
1782
1783    This routine assumes that ARG1 is a real number with a fractional part
1784    (the integer exponent case will have been handled earlier in
1785    gimple_expand_builtin_pow).
1786
1787    For ARG1 > 0.0:
1788    * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1789      FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1790                     FRAC_PART == ARG1 - WHOLE_PART:
1791      Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1792      POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1793      if it can be expressed as such, that is if FRAC_PART satisfies:
1794      FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1795      where integer a[i] is either 0 or 1.
1796
1797      Example:
1798      POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1799        --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1800
1801    For ARG1 < 0.0 there are two approaches:
1802    * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1803          is calculated as above.
1804
1805      Example:
1806      POW (x, -5.625) == 1.0 / POW (x, 5.625)
1807        -->  1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1808
1809    * (B) : WHOLE_PART := - ceil (abs (ARG1))
1810            FRAC_PART  := ARG1 - WHOLE_PART
1811      and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1812      Example:
1813      POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1814        --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1815
1816    For ARG1 < 0.0 we choose between (A) and (B) depending on
1817    how many multiplications we'd have to do.
1818    So, for the example in (B): POW (x, -5.875), if we were to
1819    follow algorithm (A) we would produce:
1820    1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1821    which contains more multiplications than approach (B).
1822
1823    Hopefully, this approach will eliminate potentially expensive POW library
1824    calls when unsafe floating point math is enabled and allow the compiler to
1825    further optimise the multiplies, square roots and divides produced by this
1826    function.  */
1827
1828 static tree
1829 expand_pow_as_sqrts (gimple_stmt_iterator *gsi, location_t loc,
1830                      tree arg0, tree arg1, HOST_WIDE_INT max_depth)
1831 {
1832   tree type = TREE_TYPE (arg0);
1833   machine_mode mode = TYPE_MODE (type);
1834   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
1835   bool one_over = true;
1836
1837   if (!sqrtfn)
1838     return NULL_TREE;
1839
1840   if (TREE_CODE (arg1) != REAL_CST)
1841     return NULL_TREE;
1842
1843   REAL_VALUE_TYPE exp_init = TREE_REAL_CST (arg1);
1844
1845   gcc_assert (max_depth > 0);
1846   tree *cache = XALLOCAVEC (tree, max_depth + 1);
1847
1848   struct pow_synth_sqrt_info synth_info;
1849   synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1850   synth_info.deepest = 0;
1851   synth_info.num_mults = 0;
1852
1853   bool neg_exp = REAL_VALUE_NEGATIVE (exp_init);
1854   REAL_VALUE_TYPE exp = real_value_abs (&exp_init);
1855
1856   /* The whole and fractional parts of exp.  */
1857   REAL_VALUE_TYPE whole_part;
1858   REAL_VALUE_TYPE frac_part;
1859
1860   real_floor (&whole_part, mode, &exp);
1861   real_arithmetic (&frac_part, MINUS_EXPR, &exp, &whole_part);
1862
1863
1864   REAL_VALUE_TYPE ceil_whole = dconst0;
1865   REAL_VALUE_TYPE ceil_fract = dconst0;
1866
1867   if (neg_exp)
1868     {
1869       real_ceil (&ceil_whole, mode, &exp);
1870       real_arithmetic (&ceil_fract, MINUS_EXPR, &ceil_whole, &exp);
1871     }
1872
1873   if (!representable_as_half_series_p (frac_part, max_depth, &synth_info))
1874     return NULL_TREE;
1875
1876   /* Check whether it's more profitable to not use 1.0 / ...  */
1877   if (neg_exp)
1878     {
1879       struct pow_synth_sqrt_info alt_synth_info;
1880       alt_synth_info.factors = XALLOCAVEC (bool, max_depth + 1);
1881       alt_synth_info.deepest = 0;
1882       alt_synth_info.num_mults = 0;
1883
1884       if (representable_as_half_series_p (ceil_fract, max_depth,
1885                                            &alt_synth_info)
1886           && alt_synth_info.deepest <= synth_info.deepest
1887           && alt_synth_info.num_mults < synth_info.num_mults)
1888         {
1889           whole_part = ceil_whole;
1890           frac_part = ceil_fract;
1891           synth_info.deepest = alt_synth_info.deepest;
1892           synth_info.num_mults = alt_synth_info.num_mults;
1893           memcpy (synth_info.factors, alt_synth_info.factors,
1894                   (max_depth + 1) * sizeof (bool));
1895           one_over = false;
1896         }
1897     }
1898
1899   HOST_WIDE_INT n = real_to_integer (&whole_part);
1900   REAL_VALUE_TYPE cint;
1901   real_from_integer (&cint, VOIDmode, n, SIGNED);
1902
1903   if (!real_identical (&whole_part, &cint))
1904     return NULL_TREE;
1905
1906   if (powi_cost (n) + synth_info.num_mults > POWI_MAX_MULTS)
1907     return NULL_TREE;
1908
1909   memset (cache, 0, (max_depth + 1) * sizeof (tree));
1910
1911   tree integer_res = n == 0 ? build_real (type, dconst1) : arg0;
1912
1913   /* Calculate the integer part of the exponent.  */
1914   if (n > 1)
1915     {
1916       integer_res = gimple_expand_builtin_powi (gsi, loc, arg0, n);
1917       if (!integer_res)
1918         return NULL_TREE;
1919     }
1920
1921   if (dump_file)
1922     {
1923       char string[64];
1924
1925       real_to_decimal (string, &exp_init, sizeof (string), 0, 1);
1926       fprintf (dump_file, "synthesizing pow (x, %s) as:\n", string);
1927
1928       if (neg_exp)
1929         {
1930           if (one_over)
1931             {
1932               fprintf (dump_file, "1.0 / (");
1933               dump_integer_part (dump_file, "x", n);
1934               if (n > 0)
1935                 fprintf (dump_file, " * ");
1936               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1937               fprintf (dump_file, ")");
1938             }
1939           else
1940             {
1941               dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1942               fprintf (dump_file, " / (");
1943               dump_integer_part (dump_file, "x", n);
1944               fprintf (dump_file, ")");
1945             }
1946         }
1947       else
1948         {
1949           dump_fractional_sqrt_sequence (dump_file, "x", &synth_info);
1950           if (n > 0)
1951             fprintf (dump_file, " * ");
1952           dump_integer_part (dump_file, "x", n);
1953         }
1954
1955       fprintf (dump_file, "\ndeepest sqrt chain: %d\n", synth_info.deepest);
1956     }
1957
1958
1959   tree fract_res = NULL_TREE;
1960   cache[0] = arg0;
1961
1962   /* Calculate the fractional part of the exponent.  */
1963   for (unsigned i = 0; i < synth_info.deepest; i++)
1964     {
1965       if (synth_info.factors[i])
1966         {
1967           tree sqrt_chain = get_fn_chain (arg0, i + 1, gsi, sqrtfn, loc, cache);
1968
1969           if (!fract_res)
1970               fract_res = sqrt_chain;
1971
1972           else
1973             fract_res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1974                                            fract_res, sqrt_chain);
1975         }
1976     }
1977
1978   tree res = NULL_TREE;
1979
1980   if (neg_exp)
1981     {
1982       if (one_over)
1983         {
1984           if (n > 0)
1985             res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
1986                                            fract_res, integer_res);
1987           else
1988             res = fract_res;
1989
1990           res = build_and_insert_binop (gsi, loc, "powrootrecip", RDIV_EXPR,
1991                                           build_real (type, dconst1), res);
1992         }
1993       else
1994         {
1995           res = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
1996                                          fract_res, integer_res);
1997         }
1998     }
1999   else
2000     res = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2001                                    fract_res, integer_res);
2002   return res;
2003 }
2004
2005 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2006    with location info LOC.  If possible, create an equivalent and
2007    less expensive sequence of statements prior to GSI, and return an
2008    expession holding the result.  */
2009
2010 static tree
2011 gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, location_t loc,
2012                            tree arg0, tree arg1)
2013 {
2014   REAL_VALUE_TYPE c, cint, dconst1_3, dconst1_4, dconst1_6;
2015   REAL_VALUE_TYPE c2, dconst3;
2016   HOST_WIDE_INT n;
2017   tree type, sqrtfn, cbrtfn, sqrt_arg0, result, cbrt_x, powi_cbrt_x;
2018   machine_mode mode;
2019   bool speed_p = optimize_bb_for_speed_p (gsi_bb (*gsi));
2020   bool hw_sqrt_exists, c_is_int, c2_is_int;
2021
2022   dconst1_4 = dconst1;
2023   SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
2024
2025   /* If the exponent isn't a constant, there's nothing of interest
2026      to be done.  */
2027   if (TREE_CODE (arg1) != REAL_CST)
2028     return NULL_TREE;
2029
2030   /* Don't perform the operation if flag_signaling_nans is on
2031      and the operand is a signaling NaN.  */
2032   if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
2033       && ((TREE_CODE (arg0) == REAL_CST
2034            && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0)))
2035           || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1))))
2036     return NULL_TREE;
2037
2038   /* If the exponent is equivalent to an integer, expand to an optimal
2039      multiplication sequence when profitable.  */
2040   c = TREE_REAL_CST (arg1);
2041   n = real_to_integer (&c);
2042   real_from_integer (&cint, VOIDmode, n, SIGNED);
2043   c_is_int = real_identical (&c, &cint);
2044
2045   if (c_is_int
2046       && ((n >= -1 && n <= 2)
2047           || (flag_unsafe_math_optimizations
2048               && speed_p
2049               && powi_cost (n) <= POWI_MAX_MULTS)))
2050     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
2051
2052   /* Attempt various optimizations using sqrt and cbrt.  */
2053   type = TREE_TYPE (arg0);
2054   mode = TYPE_MODE (type);
2055   sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2056
2057   /* Optimize pow(x,0.5) = sqrt(x).  This replacement is always safe
2058      unless signed zeros must be maintained.  pow(-0,0.5) = +0, while
2059      sqrt(-0) = -0.  */
2060   if (sqrtfn
2061       && real_equal (&c, &dconsthalf)
2062       && !HONOR_SIGNED_ZEROS (mode))
2063     return build_and_insert_call (gsi, loc, sqrtfn, arg0);
2064
2065   hw_sqrt_exists = optab_handler (sqrt_optab, mode) != CODE_FOR_nothing;
2066
2067   /* Optimize pow(x,1./3.) = cbrt(x).  This requires unsafe math
2068      optimizations since 1./3. is not exactly representable.  If x
2069      is negative and finite, the correct value of pow(x,1./3.) is
2070      a NaN with the "invalid" exception raised, because the value
2071      of 1./3. actually has an even denominator.  The correct value
2072      of cbrt(x) is a negative real value.  */
2073   cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
2074   dconst1_3 = real_value_truncate (mode, dconst_third ());
2075
2076   if (flag_unsafe_math_optimizations
2077       && cbrtfn
2078       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2079       && real_equal (&c, &dconst1_3))
2080     return build_and_insert_call (gsi, loc, cbrtfn, arg0);
2081
2082   /* Optimize pow(x,1./6.) = cbrt(sqrt(x)).  Don't do this optimization
2083      if we don't have a hardware sqrt insn.  */
2084   dconst1_6 = dconst1_3;
2085   SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
2086
2087   if (flag_unsafe_math_optimizations
2088       && sqrtfn
2089       && cbrtfn
2090       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2091       && speed_p
2092       && hw_sqrt_exists
2093       && real_equal (&c, &dconst1_6))
2094     {
2095       /* sqrt(x)  */
2096       sqrt_arg0 = build_and_insert_call (gsi, loc, sqrtfn, arg0);
2097
2098       /* cbrt(sqrt(x))  */
2099       return build_and_insert_call (gsi, loc, cbrtfn, sqrt_arg0);
2100     }
2101
2102
2103   /* Attempt to expand the POW as a product of square root chains.
2104      Expand the 0.25 case even when otpimising for size.  */
2105   if (flag_unsafe_math_optimizations
2106       && sqrtfn
2107       && hw_sqrt_exists
2108       && (speed_p || real_equal (&c, &dconst1_4))
2109       && !HONOR_SIGNED_ZEROS (mode))
2110     {
2111       unsigned int max_depth = speed_p
2112                                 ? param_max_pow_sqrt_depth
2113                                 : 2;
2114
2115       tree expand_with_sqrts
2116         = expand_pow_as_sqrts (gsi, loc, arg0, arg1, max_depth);
2117
2118       if (expand_with_sqrts)
2119         return expand_with_sqrts;
2120     }
2121
2122   real_arithmetic (&c2, MULT_EXPR, &c, &dconst2);
2123   n = real_to_integer (&c2);
2124   real_from_integer (&cint, VOIDmode, n, SIGNED);
2125   c2_is_int = real_identical (&c2, &cint);
2126
2127   /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2128
2129      powi(x, n/3) * powi(cbrt(x), n%3),                    n > 0;
2130      1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)),  n < 0.
2131
2132      Do not calculate the first factor when n/3 = 0.  As cbrt(x) is
2133      different from pow(x, 1./3.) due to rounding and behavior with
2134      negative x, we need to constrain this transformation to unsafe
2135      math and positive x or finite math.  */
2136   real_from_integer (&dconst3, VOIDmode, 3, SIGNED);
2137   real_arithmetic (&c2, MULT_EXPR, &c, &dconst3);
2138   real_round (&c2, mode, &c2);
2139   n = real_to_integer (&c2);
2140   real_from_integer (&cint, VOIDmode, n, SIGNED);
2141   real_arithmetic (&c2, RDIV_EXPR, &cint, &dconst3);
2142   real_convert (&c2, mode, &c2);
2143
2144   if (flag_unsafe_math_optimizations
2145       && cbrtfn
2146       && (!HONOR_NANS (mode) || tree_expr_nonnegative_p (arg0))
2147       && real_identical (&c2, &c)
2148       && !c2_is_int
2149       && optimize_function_for_speed_p (cfun)
2150       && powi_cost (n / 3) <= POWI_MAX_MULTS)
2151     {
2152       tree powi_x_ndiv3 = NULL_TREE;
2153
2154       /* Attempt to fold powi(arg0, abs(n/3)) into multiplies.  If not
2155          possible or profitable, give up.  Skip the degenerate case when
2156          abs(n) < 3, where the result is always 1.  */
2157       if (absu_hwi (n) >= 3)
2158         {
2159           powi_x_ndiv3 = gimple_expand_builtin_powi (gsi, loc, arg0,
2160                                                      abs_hwi (n / 3));
2161           if (!powi_x_ndiv3)
2162             return NULL_TREE;
2163         }
2164
2165       /* Calculate powi(cbrt(x), n%3).  Don't use gimple_expand_builtin_powi
2166          as that creates an unnecessary variable.  Instead, just produce
2167          either cbrt(x) or cbrt(x) * cbrt(x).  */
2168       cbrt_x = build_and_insert_call (gsi, loc, cbrtfn, arg0);
2169
2170       if (absu_hwi (n) % 3 == 1)
2171         powi_cbrt_x = cbrt_x;
2172       else
2173         powi_cbrt_x = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2174                                               cbrt_x, cbrt_x);
2175
2176       /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1.  */
2177       if (absu_hwi (n) < 3)
2178         result = powi_cbrt_x;
2179       else
2180         result = build_and_insert_binop (gsi, loc, "powroot", MULT_EXPR,
2181                                          powi_x_ndiv3, powi_cbrt_x);
2182
2183       /* If n is negative, reciprocate the result.  */
2184       if (n < 0)
2185         result = build_and_insert_binop (gsi, loc, "powroot", RDIV_EXPR,
2186                                          build_real (type, dconst1), result);
2187
2188       return result;
2189     }
2190
2191   /* No optimizations succeeded.  */
2192   return NULL_TREE;
2193 }
2194
2195 /* ARG is the argument to a cabs builtin call in GSI with location info
2196    LOC.  Create a sequence of statements prior to GSI that calculates
2197    sqrt(R*R + I*I), where R and I are the real and imaginary components
2198    of ARG, respectively.  Return an expression holding the result.  */
2199
2200 static tree
2201 gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree arg)
2202 {
2203   tree real_part, imag_part, addend1, addend2, sum, result;
2204   tree type = TREE_TYPE (TREE_TYPE (arg));
2205   tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
2206   machine_mode mode = TYPE_MODE (type);
2207
2208   if (!flag_unsafe_math_optimizations
2209       || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi)))
2210       || !sqrtfn
2211       || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing)
2212     return NULL_TREE;
2213
2214   real_part = build_and_insert_ref (gsi, loc, type, "cabs",
2215                                     REALPART_EXPR, arg);
2216   addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2217                                     real_part, real_part);
2218   imag_part = build_and_insert_ref (gsi, loc, type, "cabs",
2219                                     IMAGPART_EXPR, arg);
2220   addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR,
2221                                     imag_part, imag_part);
2222   sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, addend2);
2223   result = build_and_insert_call (gsi, loc, sqrtfn, sum);
2224
2225   return result;
2226 }
2227
2228 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2229    on the SSA_NAME argument of each of them.  */
2230
2231 namespace {
2232
2233 const pass_data pass_data_cse_sincos =
2234 {
2235   GIMPLE_PASS, /* type */
2236   "sincos", /* name */
2237   OPTGROUP_NONE, /* optinfo_flags */
2238   TV_TREE_SINCOS, /* tv_id */
2239   PROP_ssa, /* properties_required */
2240   0, /* properties_provided */
2241   0, /* properties_destroyed */
2242   0, /* todo_flags_start */
2243   TODO_update_ssa, /* todo_flags_finish */
2244 };
2245
2246 class pass_cse_sincos : public gimple_opt_pass
2247 {
2248 public:
2249   pass_cse_sincos (gcc::context *ctxt)
2250     : gimple_opt_pass (pass_data_cse_sincos, ctxt)
2251   {}
2252
2253   /* opt_pass methods: */
2254   bool gate (function *) final override
2255     {
2256       return optimize;
2257     }
2258
2259   unsigned int execute (function *) final override;
2260
2261 }; // class pass_cse_sincos
2262
2263 unsigned int
2264 pass_cse_sincos::execute (function *fun)
2265 {
2266   basic_block bb;
2267   bool cfg_changed = false;
2268
2269   calculate_dominance_info (CDI_DOMINATORS);
2270   memset (&sincos_stats, 0, sizeof (sincos_stats));
2271
2272   FOR_EACH_BB_FN (bb, fun)
2273     {
2274       gimple_stmt_iterator gsi;
2275
2276       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2277         {
2278           gimple *stmt = gsi_stmt (gsi);
2279
2280           if (is_gimple_call (stmt)
2281               && gimple_call_lhs (stmt))
2282             {
2283               tree arg;
2284               switch (gimple_call_combined_fn (stmt))
2285                 {
2286                 CASE_CFN_COS:
2287                 CASE_CFN_SIN:
2288                 CASE_CFN_CEXPI:
2289                   arg = gimple_call_arg (stmt, 0);
2290                   /* Make sure we have either sincos or cexp.  */
2291                   if (!targetm.libc_has_function (function_c99_math_complex,
2292                                                   TREE_TYPE (arg))
2293                       && !targetm.libc_has_function (function_sincos,
2294                                                      TREE_TYPE (arg)))
2295                     break;
2296
2297                   if (TREE_CODE (arg) == SSA_NAME)
2298                     cfg_changed |= execute_cse_sincos_1 (arg);
2299                   break;
2300                 default:
2301                   break;
2302                 }
2303             }
2304         }
2305     }
2306
2307   statistics_counter_event (fun, "sincos statements inserted",
2308                             sincos_stats.inserted);
2309   statistics_counter_event (fun, "conv statements removed",
2310                             sincos_stats.conv_removed);
2311
2312   return cfg_changed ? TODO_cleanup_cfg : 0;
2313 }
2314
2315 } // anon namespace
2316
2317 gimple_opt_pass *
2318 make_pass_cse_sincos (gcc::context *ctxt)
2319 {
2320   return new pass_cse_sincos (ctxt);
2321 }
2322
2323 /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2324    Also expand CABS.  */
2325 namespace {
2326
2327 const pass_data pass_data_expand_powcabs =
2328 {
2329   GIMPLE_PASS, /* type */
2330   "powcabs", /* name */
2331   OPTGROUP_NONE, /* optinfo_flags */
2332   TV_TREE_POWCABS, /* tv_id */
2333   PROP_ssa, /* properties_required */
2334   PROP_gimple_opt_math, /* properties_provided */
2335   0, /* properties_destroyed */
2336   0, /* todo_flags_start */
2337   TODO_update_ssa, /* todo_flags_finish */
2338 };
2339
2340 class pass_expand_powcabs : public gimple_opt_pass
2341 {
2342 public:
2343   pass_expand_powcabs (gcc::context *ctxt)
2344     : gimple_opt_pass (pass_data_expand_powcabs, ctxt)
2345   {}
2346
2347   /* opt_pass methods: */
2348   bool gate (function *) final override
2349     {
2350       return optimize;
2351     }
2352
2353   unsigned int execute (function *) final override;
2354
2355 }; // class pass_expand_powcabs
2356
2357 unsigned int
2358 pass_expand_powcabs::execute (function *fun)
2359 {
2360   basic_block bb;
2361   bool cfg_changed = false;
2362
2363   calculate_dominance_info (CDI_DOMINATORS);
2364
2365   FOR_EACH_BB_FN (bb, fun)
2366     {
2367       gimple_stmt_iterator gsi;
2368       bool cleanup_eh = false;
2369
2370       for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2371         {
2372           gimple *stmt = gsi_stmt (gsi);
2373
2374           /* Only the last stmt in a bb could throw, no need to call
2375              gimple_purge_dead_eh_edges if we change something in the middle
2376              of a basic block.  */
2377           cleanup_eh = false;
2378
2379           if (is_gimple_call (stmt)
2380               && gimple_call_lhs (stmt))
2381             {
2382               tree arg0, arg1, result;
2383               HOST_WIDE_INT n;
2384               location_t loc;
2385
2386               switch (gimple_call_combined_fn (stmt))
2387                 {
2388                 CASE_CFN_POW:
2389                   arg0 = gimple_call_arg (stmt, 0);
2390                   arg1 = gimple_call_arg (stmt, 1);
2391
2392                   loc = gimple_location (stmt);
2393                   result = gimple_expand_builtin_pow (&gsi, loc, arg0, arg1);
2394
2395                   if (result)
2396                     {
2397                       tree lhs = gimple_get_lhs (stmt);
2398                       gassign *new_stmt = gimple_build_assign (lhs, result);
2399                       gimple_set_location (new_stmt, loc);
2400                       unlink_stmt_vdef (stmt);
2401                       gsi_replace (&gsi, new_stmt, true);
2402                       cleanup_eh = true;
2403                       if (gimple_vdef (stmt))
2404                         release_ssa_name (gimple_vdef (stmt));
2405                     }
2406                   break;
2407
2408                 CASE_CFN_POWI:
2409                   arg0 = gimple_call_arg (stmt, 0);
2410                   arg1 = gimple_call_arg (stmt, 1);
2411                   loc = gimple_location (stmt);
2412
2413                   if (real_minus_onep (arg0))
2414                     {
2415                       tree t0, t1, cond, one, minus_one;
2416                       gassign *stmt;
2417
2418                       t0 = TREE_TYPE (arg0);
2419                       t1 = TREE_TYPE (arg1);
2420                       one = build_real (t0, dconst1);
2421                       minus_one = build_real (t0, dconstm1);
2422
2423                       cond = make_temp_ssa_name (t1, NULL, "powi_cond");
2424                       stmt = gimple_build_assign (cond, BIT_AND_EXPR,
2425                                                   arg1, build_int_cst (t1, 1));
2426                       gimple_set_location (stmt, loc);
2427                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2428
2429                       result = make_temp_ssa_name (t0, NULL, "powi");
2430                       stmt = gimple_build_assign (result, COND_EXPR, cond,
2431                                                   minus_one, one);
2432                       gimple_set_location (stmt, loc);
2433                       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
2434                     }
2435                   else
2436                     {
2437                       if (!tree_fits_shwi_p (arg1))
2438                         break;
2439
2440                       n = tree_to_shwi (arg1);
2441                       result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
2442                     }
2443
2444                   if (result)
2445                     {
2446                       tree lhs = gimple_get_lhs (stmt);
2447                       gassign *new_stmt = gimple_build_assign (lhs, result);
2448                       gimple_set_location (new_stmt, loc);
2449                       unlink_stmt_vdef (stmt);
2450                       gsi_replace (&gsi, new_stmt, true);
2451                       cleanup_eh = true;
2452                       if (gimple_vdef (stmt))
2453                         release_ssa_name (gimple_vdef (stmt));
2454                     }
2455                   break;
2456
2457                 CASE_CFN_CABS:
2458                   arg0 = gimple_call_arg (stmt, 0);
2459                   loc = gimple_location (stmt);
2460                   result = gimple_expand_builtin_cabs (&gsi, loc, arg0);
2461
2462                   if (result)
2463                     {
2464                       tree lhs = gimple_get_lhs (stmt);
2465                       gassign *new_stmt = gimple_build_assign (lhs, result);
2466                       gimple_set_location (new_stmt, loc);
2467                       unlink_stmt_vdef (stmt);
2468                       gsi_replace (&gsi, new_stmt, true);
2469                       cleanup_eh = true;
2470                       if (gimple_vdef (stmt))
2471                         release_ssa_name (gimple_vdef (stmt));
2472                     }
2473                   break;
2474
2475                 default:;
2476                 }
2477             }
2478         }
2479       if (cleanup_eh)
2480         cfg_changed |= gimple_purge_dead_eh_edges (bb);
2481     }
2482
2483   return cfg_changed ? TODO_cleanup_cfg : 0;
2484 }
2485
2486 } // anon namespace
2487
2488 gimple_opt_pass *
2489 make_pass_expand_powcabs (gcc::context *ctxt)
2490 {
2491   return new pass_expand_powcabs (ctxt);
2492 }
2493
2494 /* Return true if stmt is a type conversion operation that can be stripped
2495    when used in a widening multiply operation.  */
2496 static bool
2497 widening_mult_conversion_strippable_p (tree result_type, gimple *stmt)
2498 {
2499   enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
2500
2501   if (TREE_CODE (result_type) == INTEGER_TYPE)
2502     {
2503       tree op_type;
2504       tree inner_op_type;
2505
2506       if (!CONVERT_EXPR_CODE_P (rhs_code))
2507         return false;
2508
2509       op_type = TREE_TYPE (gimple_assign_lhs (stmt));
2510
2511       /* If the type of OP has the same precision as the result, then
2512          we can strip this conversion.  The multiply operation will be
2513          selected to create the correct extension as a by-product.  */
2514       if (TYPE_PRECISION (result_type) == TYPE_PRECISION (op_type))
2515         return true;
2516
2517       /* We can also strip a conversion if it preserves the signed-ness of
2518          the operation and doesn't narrow the range.  */
2519       inner_op_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
2520
2521       /* If the inner-most type is unsigned, then we can strip any
2522          intermediate widening operation.  If it's signed, then the
2523          intermediate widening operation must also be signed.  */
2524       if ((TYPE_UNSIGNED (inner_op_type)
2525            || TYPE_UNSIGNED (op_type) == TYPE_UNSIGNED (inner_op_type))
2526           && TYPE_PRECISION (op_type) > TYPE_PRECISION (inner_op_type))
2527         return true;
2528
2529       return false;
2530     }
2531
2532   return rhs_code == FIXED_CONVERT_EXPR;
2533 }
2534
2535 /* Return true if RHS is a suitable operand for a widening multiplication,
2536    assuming a target type of TYPE.
2537    There are two cases:
2538
2539      - RHS makes some value at least twice as wide.  Store that value
2540        in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2541
2542      - RHS is an integer constant.  Store that value in *NEW_RHS_OUT if so,
2543        but leave *TYPE_OUT untouched.  */
2544
2545 static bool
2546 is_widening_mult_rhs_p (tree type, tree rhs, tree *type_out,
2547                         tree *new_rhs_out)
2548 {
2549   gimple *stmt;
2550   tree type1, rhs1;
2551
2552   if (TREE_CODE (rhs) == SSA_NAME)
2553     {
2554       stmt = SSA_NAME_DEF_STMT (rhs);
2555       if (is_gimple_assign (stmt))
2556         {
2557           if (! widening_mult_conversion_strippable_p (type, stmt))
2558             rhs1 = rhs;
2559           else
2560             {
2561               rhs1 = gimple_assign_rhs1 (stmt);
2562
2563               if (TREE_CODE (rhs1) == INTEGER_CST)
2564                 {
2565                   *new_rhs_out = rhs1;
2566                   *type_out = NULL;
2567                   return true;
2568                 }
2569             }
2570         }
2571       else
2572         rhs1 = rhs;
2573
2574       type1 = TREE_TYPE (rhs1);
2575
2576       if (TREE_CODE (type1) != TREE_CODE (type)
2577           || TYPE_PRECISION (type1) * 2 > TYPE_PRECISION (type))
2578         return false;
2579
2580       *new_rhs_out = rhs1;
2581       *type_out = type1;
2582       return true;
2583     }
2584
2585   if (TREE_CODE (rhs) == INTEGER_CST)
2586     {
2587       *new_rhs_out = rhs;
2588       *type_out = NULL;
2589       return true;
2590     }
2591
2592   return false;
2593 }
2594
2595 /* Return true if STMT performs a widening multiplication, assuming the
2596    output type is TYPE.  If so, store the unwidened types of the operands
2597    in *TYPE1_OUT and *TYPE2_OUT respectively.  Also fill *RHS1_OUT and
2598    *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2599    and *TYPE2_OUT would give the operands of the multiplication.  */
2600
2601 static bool
2602 is_widening_mult_p (gimple *stmt,
2603                     tree *type1_out, tree *rhs1_out,
2604                     tree *type2_out, tree *rhs2_out)
2605 {
2606   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2607
2608   if (TREE_CODE (type) == INTEGER_TYPE)
2609     {
2610       if (TYPE_OVERFLOW_TRAPS (type))
2611         return false;
2612     }
2613   else if (TREE_CODE (type) != FIXED_POINT_TYPE)
2614     return false;
2615
2616   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs1 (stmt), type1_out,
2617                                rhs1_out))
2618     return false;
2619
2620   if (!is_widening_mult_rhs_p (type, gimple_assign_rhs2 (stmt), type2_out,
2621                                rhs2_out))
2622     return false;
2623
2624   if (*type1_out == NULL)
2625     {
2626       if (*type2_out == NULL || !int_fits_type_p (*rhs1_out, *type2_out))
2627         return false;
2628       *type1_out = *type2_out;
2629     }
2630
2631   if (*type2_out == NULL)
2632     {
2633       if (!int_fits_type_p (*rhs2_out, *type1_out))
2634         return false;
2635       *type2_out = *type1_out;
2636     }
2637
2638   /* Ensure that the larger of the two operands comes first. */
2639   if (TYPE_PRECISION (*type1_out) < TYPE_PRECISION (*type2_out))
2640     {
2641       std::swap (*type1_out, *type2_out);
2642       std::swap (*rhs1_out, *rhs2_out);
2643     }
2644
2645   return true;
2646 }
2647
2648 /* Check to see if the CALL statement is an invocation of copysign
2649    with 1. being the first argument.  */
2650 static bool
2651 is_copysign_call_with_1 (gimple *call)
2652 {
2653   gcall *c = dyn_cast <gcall *> (call);
2654   if (! c)
2655     return false;
2656
2657   enum combined_fn code = gimple_call_combined_fn (c);
2658
2659   if (code == CFN_LAST)
2660     return false;
2661
2662   if (builtin_fn_p (code))
2663     {
2664       switch (as_builtin_fn (code))
2665         {
2666         CASE_FLT_FN (BUILT_IN_COPYSIGN):
2667         CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN):
2668           return real_onep (gimple_call_arg (c, 0));
2669         default:
2670           return false;
2671         }
2672     }
2673
2674   if (internal_fn_p (code))
2675     {
2676       switch (as_internal_fn (code))
2677         {
2678         case IFN_COPYSIGN:
2679           return real_onep (gimple_call_arg (c, 0));
2680         default:
2681           return false;
2682         }
2683     }
2684
2685    return false;
2686 }
2687
2688 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2689    This only happens when the xorsign optab is defined, if the
2690    pattern is not a xorsign pattern or if expansion fails FALSE is
2691    returned, otherwise TRUE is returned.  */
2692 static bool
2693 convert_expand_mult_copysign (gimple *stmt, gimple_stmt_iterator *gsi)
2694 {
2695   tree treeop0, treeop1, lhs, type;
2696   location_t loc = gimple_location (stmt);
2697   lhs = gimple_assign_lhs (stmt);
2698   treeop0 = gimple_assign_rhs1 (stmt);
2699   treeop1 = gimple_assign_rhs2 (stmt);
2700   type = TREE_TYPE (lhs);
2701   machine_mode mode = TYPE_MODE (type);
2702
2703   if (HONOR_SNANS (type))
2704     return false;
2705
2706   if (TREE_CODE (treeop0) == SSA_NAME && TREE_CODE (treeop1) == SSA_NAME)
2707     {
2708       gimple *call0 = SSA_NAME_DEF_STMT (treeop0);
2709       if (!has_single_use (treeop0) || !is_copysign_call_with_1 (call0))
2710         {
2711           call0 = SSA_NAME_DEF_STMT (treeop1);
2712           if (!has_single_use (treeop1) || !is_copysign_call_with_1 (call0))
2713             return false;
2714
2715           treeop1 = treeop0;
2716         }
2717         if (optab_handler (xorsign_optab, mode) == CODE_FOR_nothing)
2718           return false;
2719
2720         gcall *c = as_a<gcall*> (call0);
2721         treeop0 = gimple_call_arg (c, 1);
2722
2723         gcall *call_stmt
2724           = gimple_build_call_internal (IFN_XORSIGN, 2, treeop1, treeop0);
2725         gimple_set_lhs (call_stmt, lhs);
2726         gimple_set_location (call_stmt, loc);
2727         gsi_replace (gsi, call_stmt, true);
2728         return true;
2729     }
2730
2731   return false;
2732 }
2733
2734 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2735    its rhs, and try to convert it into a WIDEN_MULT_EXPR.  The return
2736    value is true iff we converted the statement.  */
2737
2738 static bool
2739 convert_mult_to_widen (gimple *stmt, gimple_stmt_iterator *gsi)
2740 {
2741   tree lhs, rhs1, rhs2, type, type1, type2;
2742   enum insn_code handler;
2743   scalar_int_mode to_mode, from_mode, actual_mode;
2744   optab op;
2745   int actual_precision;
2746   location_t loc = gimple_location (stmt);
2747   bool from_unsigned1, from_unsigned2;
2748
2749   lhs = gimple_assign_lhs (stmt);
2750   type = TREE_TYPE (lhs);
2751   if (TREE_CODE (type) != INTEGER_TYPE)
2752     return false;
2753
2754   if (!is_widening_mult_p (stmt, &type1, &rhs1, &type2, &rhs2))
2755     return false;
2756
2757   to_mode = SCALAR_INT_TYPE_MODE (type);
2758   from_mode = SCALAR_INT_TYPE_MODE (type1);
2759   if (to_mode == from_mode)
2760     return false;
2761
2762   from_unsigned1 = TYPE_UNSIGNED (type1);
2763   from_unsigned2 = TYPE_UNSIGNED (type2);
2764
2765   if (from_unsigned1 && from_unsigned2)
2766     op = umul_widen_optab;
2767   else if (!from_unsigned1 && !from_unsigned2)
2768     op = smul_widen_optab;
2769   else
2770     op = usmul_widen_optab;
2771
2772   handler = find_widening_optab_handler_and_mode (op, to_mode, from_mode,
2773                                                   &actual_mode);
2774
2775   if (handler == CODE_FOR_nothing)
2776     {
2777       if (op != smul_widen_optab)
2778         {
2779           /* We can use a signed multiply with unsigned types as long as
2780              there is a wider mode to use, or it is the smaller of the two
2781              types that is unsigned.  Note that type1 >= type2, always.  */
2782           if ((TYPE_UNSIGNED (type1)
2783                && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2784               || (TYPE_UNSIGNED (type2)
2785                   && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2786             {
2787               if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2788                   || GET_MODE_SIZE (to_mode) <= GET_MODE_SIZE (from_mode))
2789                 return false;
2790             }
2791
2792           op = smul_widen_optab;
2793           handler = find_widening_optab_handler_and_mode (op, to_mode,
2794                                                           from_mode,
2795                                                           &actual_mode);
2796
2797           if (handler == CODE_FOR_nothing)
2798             return false;
2799
2800           from_unsigned1 = from_unsigned2 = false;
2801         }
2802       else
2803         {
2804           /* Expand can synthesize smul_widen_optab if the target
2805              supports umul_widen_optab.  */
2806           op = umul_widen_optab;
2807           handler = find_widening_optab_handler_and_mode (op, to_mode,
2808                                                           from_mode,
2809                                                           &actual_mode);
2810           if (handler == CODE_FOR_nothing)
2811             return false;
2812         }
2813     }
2814
2815   /* Ensure that the inputs to the handler are in the correct precison
2816      for the opcode.  This will be the full mode size.  */
2817   actual_precision = GET_MODE_PRECISION (actual_mode);
2818   if (2 * actual_precision > TYPE_PRECISION (type))
2819     return false;
2820   if (actual_precision != TYPE_PRECISION (type1)
2821       || from_unsigned1 != TYPE_UNSIGNED (type1))
2822     rhs1 = build_and_insert_cast (gsi, loc,
2823                                   build_nonstandard_integer_type
2824                                     (actual_precision, from_unsigned1), rhs1);
2825   if (actual_precision != TYPE_PRECISION (type2)
2826       || from_unsigned2 != TYPE_UNSIGNED (type2))
2827     rhs2 = build_and_insert_cast (gsi, loc,
2828                                   build_nonstandard_integer_type
2829                                     (actual_precision, from_unsigned2), rhs2);
2830
2831   /* Handle constants.  */
2832   if (TREE_CODE (rhs1) == INTEGER_CST)
2833     rhs1 = fold_convert (type1, rhs1);
2834   if (TREE_CODE (rhs2) == INTEGER_CST)
2835     rhs2 = fold_convert (type2, rhs2);
2836
2837   gimple_assign_set_rhs1 (stmt, rhs1);
2838   gimple_assign_set_rhs2 (stmt, rhs2);
2839   gimple_assign_set_rhs_code (stmt, WIDEN_MULT_EXPR);
2840   update_stmt (stmt);
2841   widen_mul_stats.widen_mults_inserted++;
2842   return true;
2843 }
2844
2845 /* Process a single gimple statement STMT, which is found at the
2846    iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2847    rhs (given by CODE), and try to convert it into a
2848    WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR.  The return value
2849    is true iff we converted the statement.  */
2850
2851 static bool
2852 convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple *stmt,
2853                             enum tree_code code)
2854 {
2855   gimple *rhs1_stmt = NULL, *rhs2_stmt = NULL;
2856   gimple *conv1_stmt = NULL, *conv2_stmt = NULL, *conv_stmt;
2857   tree type, type1, type2, optype;
2858   tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
2859   enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
2860   optab this_optab;
2861   enum tree_code wmult_code;
2862   enum insn_code handler;
2863   scalar_mode to_mode, from_mode, actual_mode;
2864   location_t loc = gimple_location (stmt);
2865   int actual_precision;
2866   bool from_unsigned1, from_unsigned2;
2867
2868   lhs = gimple_assign_lhs (stmt);
2869   type = TREE_TYPE (lhs);
2870   if (TREE_CODE (type) != INTEGER_TYPE
2871       && TREE_CODE (type) != FIXED_POINT_TYPE)
2872     return false;
2873
2874   if (code == MINUS_EXPR)
2875     wmult_code = WIDEN_MULT_MINUS_EXPR;
2876   else
2877     wmult_code = WIDEN_MULT_PLUS_EXPR;
2878
2879   rhs1 = gimple_assign_rhs1 (stmt);
2880   rhs2 = gimple_assign_rhs2 (stmt);
2881
2882   if (TREE_CODE (rhs1) == SSA_NAME)
2883     {
2884       rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2885       if (is_gimple_assign (rhs1_stmt))
2886         rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2887     }
2888
2889   if (TREE_CODE (rhs2) == SSA_NAME)
2890     {
2891       rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2892       if (is_gimple_assign (rhs2_stmt))
2893         rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2894     }
2895
2896   /* Allow for one conversion statement between the multiply
2897      and addition/subtraction statement.  If there are more than
2898      one conversions then we assume they would invalidate this
2899      transformation.  If that's not the case then they should have
2900      been folded before now.  */
2901   if (CONVERT_EXPR_CODE_P (rhs1_code))
2902     {
2903       conv1_stmt = rhs1_stmt;
2904       rhs1 = gimple_assign_rhs1 (rhs1_stmt);
2905       if (TREE_CODE (rhs1) == SSA_NAME)
2906         {
2907           rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
2908           if (is_gimple_assign (rhs1_stmt))
2909             rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
2910         }
2911       else
2912         return false;
2913     }
2914   if (CONVERT_EXPR_CODE_P (rhs2_code))
2915     {
2916       conv2_stmt = rhs2_stmt;
2917       rhs2 = gimple_assign_rhs1 (rhs2_stmt);
2918       if (TREE_CODE (rhs2) == SSA_NAME)
2919         {
2920           rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
2921           if (is_gimple_assign (rhs2_stmt))
2922             rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
2923         }
2924       else
2925         return false;
2926     }
2927
2928   /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2929      is_widening_mult_p, but we still need the rhs returns.
2930
2931      It might also appear that it would be sufficient to use the existing
2932      operands of the widening multiply, but that would limit the choice of
2933      multiply-and-accumulate instructions.
2934
2935      If the widened-multiplication result has more than one uses, it is
2936      probably wiser not to do the conversion.  Also restrict this operation
2937      to single basic block to avoid moving the multiply to a different block
2938      with a higher execution frequency.  */
2939   if (code == PLUS_EXPR
2940       && (rhs1_code == MULT_EXPR || rhs1_code == WIDEN_MULT_EXPR))
2941     {
2942       if (!has_single_use (rhs1)
2943           || gimple_bb (rhs1_stmt) != gimple_bb (stmt)
2944           || !is_widening_mult_p (rhs1_stmt, &type1, &mult_rhs1,
2945                                   &type2, &mult_rhs2))
2946         return false;
2947       add_rhs = rhs2;
2948       conv_stmt = conv1_stmt;
2949     }
2950   else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
2951     {
2952       if (!has_single_use (rhs2)
2953           || gimple_bb (rhs2_stmt) != gimple_bb (stmt)
2954           || !is_widening_mult_p (rhs2_stmt, &type1, &mult_rhs1,
2955                                   &type2, &mult_rhs2))
2956         return false;
2957       add_rhs = rhs1;
2958       conv_stmt = conv2_stmt;
2959     }
2960   else
2961     return false;
2962
2963   to_mode = SCALAR_TYPE_MODE (type);
2964   from_mode = SCALAR_TYPE_MODE (type1);
2965   if (to_mode == from_mode)
2966     return false;
2967
2968   from_unsigned1 = TYPE_UNSIGNED (type1);
2969   from_unsigned2 = TYPE_UNSIGNED (type2);
2970   optype = type1;
2971
2972   /* There's no such thing as a mixed sign madd yet, so use a wider mode.  */
2973   if (from_unsigned1 != from_unsigned2)
2974     {
2975       if (!INTEGRAL_TYPE_P (type))
2976         return false;
2977       /* We can use a signed multiply with unsigned types as long as
2978          there is a wider mode to use, or it is the smaller of the two
2979          types that is unsigned.  Note that type1 >= type2, always.  */
2980       if ((from_unsigned1
2981            && TYPE_PRECISION (type1) == GET_MODE_PRECISION (from_mode))
2982           || (from_unsigned2
2983               && TYPE_PRECISION (type2) == GET_MODE_PRECISION (from_mode)))
2984         {
2985           if (!GET_MODE_WIDER_MODE (from_mode).exists (&from_mode)
2986               || GET_MODE_SIZE (from_mode) >= GET_MODE_SIZE (to_mode))
2987             return false;
2988         }
2989
2990       from_unsigned1 = from_unsigned2 = false;
2991       optype = build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode),
2992                                                false);
2993     }
2994
2995   /* If there was a conversion between the multiply and addition
2996      then we need to make sure it fits a multiply-and-accumulate.
2997      The should be a single mode change which does not change the
2998      value.  */
2999   if (conv_stmt)
3000     {
3001       /* We use the original, unmodified data types for this.  */
3002       tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
3003       tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
3004       int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
3005       bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
3006
3007       if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
3008         {
3009           /* Conversion is a truncate.  */
3010           if (TYPE_PRECISION (to_type) < data_size)
3011             return false;
3012         }
3013       else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
3014         {
3015           /* Conversion is an extend.  Check it's the right sort.  */
3016           if (TYPE_UNSIGNED (from_type) != is_unsigned
3017               && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
3018             return false;
3019         }
3020       /* else convert is a no-op for our purposes.  */
3021     }
3022
3023   /* Verify that the machine can perform a widening multiply
3024      accumulate in this mode/signedness combination, otherwise
3025      this transformation is likely to pessimize code.  */
3026   this_optab = optab_for_tree_code (wmult_code, optype, optab_default);
3027   handler = find_widening_optab_handler_and_mode (this_optab, to_mode,
3028                                                   from_mode, &actual_mode);
3029
3030   if (handler == CODE_FOR_nothing)
3031     return false;
3032
3033   /* Ensure that the inputs to the handler are in the correct precison
3034      for the opcode.  This will be the full mode size.  */
3035   actual_precision = GET_MODE_PRECISION (actual_mode);
3036   if (actual_precision != TYPE_PRECISION (type1)
3037       || from_unsigned1 != TYPE_UNSIGNED (type1))
3038     mult_rhs1 = build_and_insert_cast (gsi, loc,
3039                                        build_nonstandard_integer_type
3040                                          (actual_precision, from_unsigned1),
3041                                        mult_rhs1);
3042   if (actual_precision != TYPE_PRECISION (type2)
3043       || from_unsigned2 != TYPE_UNSIGNED (type2))
3044     mult_rhs2 = build_and_insert_cast (gsi, loc,
3045                                        build_nonstandard_integer_type
3046                                          (actual_precision, from_unsigned2),
3047                                        mult_rhs2);
3048
3049   if (!useless_type_conversion_p (type, TREE_TYPE (add_rhs)))
3050     add_rhs = build_and_insert_cast (gsi, loc, type, add_rhs);
3051
3052   /* Handle constants.  */
3053   if (TREE_CODE (mult_rhs1) == INTEGER_CST)
3054     mult_rhs1 = fold_convert (type1, mult_rhs1);
3055   if (TREE_CODE (mult_rhs2) == INTEGER_CST)
3056     mult_rhs2 = fold_convert (type2, mult_rhs2);
3057
3058   gimple_assign_set_rhs_with_ops (gsi, wmult_code, mult_rhs1, mult_rhs2,
3059                                   add_rhs);
3060   update_stmt (gsi_stmt (*gsi));
3061   widen_mul_stats.maccs_inserted++;
3062   return true;
3063 }
3064
3065 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3066    OP2 and which we know is used in statements that can be, together with the
3067    multiplication, converted to FMAs, perform the transformation.  */
3068
3069 static void
3070 convert_mult_to_fma_1 (tree mul_result, tree op1, tree op2)
3071 {
3072   tree type = TREE_TYPE (mul_result);
3073   gimple *use_stmt;
3074   imm_use_iterator imm_iter;
3075   gcall *fma_stmt;
3076
3077   FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, mul_result)
3078     {
3079       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
3080       tree addop, mulop1 = op1, result = mul_result;
3081       bool negate_p = false;
3082       gimple_seq seq = NULL;
3083
3084       if (is_gimple_debug (use_stmt))
3085         continue;
3086
3087       if (is_gimple_assign (use_stmt)
3088           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3089         {
3090           result = gimple_assign_lhs (use_stmt);
3091           use_operand_p use_p;
3092           gimple *neguse_stmt;
3093           single_imm_use (gimple_assign_lhs (use_stmt), &use_p, &neguse_stmt);
3094           gsi_remove (&gsi, true);
3095           release_defs (use_stmt);
3096
3097           use_stmt = neguse_stmt;
3098           gsi = gsi_for_stmt (use_stmt);
3099           negate_p = true;
3100         }
3101
3102       tree cond, else_value, ops[3];
3103       tree_code code;
3104       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code,
3105                                               ops, &else_value))
3106         gcc_unreachable ();
3107       addop = ops[0] == result ? ops[1] : ops[0];
3108
3109       if (code == MINUS_EXPR)
3110         {
3111           if (ops[0] == result)
3112             /* a * b - c -> a * b + (-c)  */
3113             addop = gimple_build (&seq, NEGATE_EXPR, type, addop);
3114           else
3115             /* a - b * c -> (-b) * c + a */
3116             negate_p = !negate_p;
3117         }
3118
3119       if (negate_p)
3120         mulop1 = gimple_build (&seq, NEGATE_EXPR, type, mulop1);
3121
3122       if (seq)
3123         gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3124
3125       if (cond)
3126         fma_stmt = gimple_build_call_internal (IFN_COND_FMA, 5, cond, mulop1,
3127                                                op2, addop, else_value);
3128       else
3129         fma_stmt = gimple_build_call_internal (IFN_FMA, 3, mulop1, op2, addop);
3130       gimple_set_lhs (fma_stmt, gimple_get_lhs (use_stmt));
3131       gimple_call_set_nothrow (fma_stmt, !stmt_can_throw_internal (cfun,
3132                                                                    use_stmt));
3133       gsi_replace (&gsi, fma_stmt, true);
3134       /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3135          regardless of where the negation occurs.  */
3136       gimple *orig_stmt = gsi_stmt (gsi);
3137       if (fold_stmt (&gsi, follow_all_ssa_edges))
3138         {
3139           if (maybe_clean_or_replace_eh_stmt (orig_stmt, gsi_stmt (gsi)))
3140             gcc_unreachable ();
3141           update_stmt (gsi_stmt (gsi));
3142         }
3143
3144       if (dump_file && (dump_flags & TDF_DETAILS))
3145         {
3146           fprintf (dump_file, "Generated FMA ");
3147           print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3148           fprintf (dump_file, "\n");
3149         }
3150
3151       /* If the FMA result is negated in a single use, fold the negation
3152          too.  */
3153       orig_stmt = gsi_stmt (gsi);
3154       use_operand_p use_p;
3155       gimple *neg_stmt;
3156       if (is_gimple_call (orig_stmt)
3157           && gimple_call_internal_p (orig_stmt)
3158           && gimple_call_lhs (orig_stmt)
3159           && TREE_CODE (gimple_call_lhs (orig_stmt)) == SSA_NAME
3160           && single_imm_use (gimple_call_lhs (orig_stmt), &use_p, &neg_stmt)
3161           && is_gimple_assign (neg_stmt)
3162           && gimple_assign_rhs_code (neg_stmt) == NEGATE_EXPR
3163           && !stmt_could_throw_p (cfun, neg_stmt))
3164         {
3165           gsi = gsi_for_stmt (neg_stmt);
3166           if (fold_stmt (&gsi, follow_all_ssa_edges))
3167             {
3168               if (maybe_clean_or_replace_eh_stmt (neg_stmt, gsi_stmt (gsi)))
3169                 gcc_unreachable ();
3170               update_stmt (gsi_stmt (gsi));
3171               if (dump_file && (dump_flags & TDF_DETAILS))
3172                 {
3173                   fprintf (dump_file, "Folded FMA negation ");
3174                   print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_NONE);
3175                   fprintf (dump_file, "\n");
3176                 }
3177             }
3178         }
3179
3180       widen_mul_stats.fmas_inserted++;
3181     }
3182 }
3183
3184 /* Data necessary to perform the actual transformation from a multiplication
3185    and an addition to an FMA after decision is taken it should be done and to
3186    then delete the multiplication statement from the function IL.  */
3187
3188 struct fma_transformation_info
3189 {
3190   gimple *mul_stmt;
3191   tree mul_result;
3192   tree op1;
3193   tree op2;
3194 };
3195
3196 /* Structure containing the current state of FMA deferring, i.e. whether we are
3197    deferring, whether to continue deferring, and all data necessary to come
3198    back and perform all deferred transformations.  */
3199
3200 class fma_deferring_state
3201 {
3202 public:
3203   /* Class constructor.  Pass true as PERFORM_DEFERRING in order to actually
3204      do any deferring.  */
3205
3206   fma_deferring_state (bool perform_deferring)
3207     : m_candidates (), m_mul_result_set (), m_initial_phi (NULL),
3208       m_last_result (NULL_TREE), m_deferring_p (perform_deferring) {}
3209
3210   /* List of FMA candidates for which we the transformation has been determined
3211      possible but we at this point in BB analysis we do not consider them
3212      beneficial.  */
3213   auto_vec<fma_transformation_info, 8> m_candidates;
3214
3215   /* Set of results of multiplication that are part of an already deferred FMA
3216      candidates.  */
3217   hash_set<tree> m_mul_result_set;
3218
3219   /* The PHI that supposedly feeds back result of a FMA to another over loop
3220      boundary.  */
3221   gphi *m_initial_phi;
3222
3223   /* Result of the last produced FMA candidate or NULL if there has not been
3224      one.  */
3225   tree m_last_result;
3226
3227   /* If true, deferring might still be profitable.  If false, transform all
3228      candidates and no longer defer.  */
3229   bool m_deferring_p;
3230 };
3231
3232 /* Transform all deferred FMA candidates and mark STATE as no longer
3233    deferring.  */
3234
3235 static void
3236 cancel_fma_deferring (fma_deferring_state *state)
3237 {
3238   if (!state->m_deferring_p)
3239     return;
3240
3241   for (unsigned i = 0; i < state->m_candidates.length (); i++)
3242     {
3243       if (dump_file && (dump_flags & TDF_DETAILS))
3244         fprintf (dump_file, "Generating deferred FMA\n");
3245
3246       const fma_transformation_info &fti = state->m_candidates[i];
3247       convert_mult_to_fma_1 (fti.mul_result, fti.op1, fti.op2);
3248
3249       gimple_stmt_iterator gsi = gsi_for_stmt (fti.mul_stmt);
3250       gsi_remove (&gsi, true);
3251       release_defs (fti.mul_stmt);
3252     }
3253   state->m_deferring_p = false;
3254 }
3255
3256 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3257    Otherwise return NULL.  */
3258
3259 static gphi *
3260 result_of_phi (tree op)
3261 {
3262   if (TREE_CODE (op) != SSA_NAME)
3263     return NULL;
3264
3265   return dyn_cast <gphi *> (SSA_NAME_DEF_STMT (op));
3266 }
3267
3268 /* After processing statements of a BB and recording STATE, return true if the
3269    initial phi is fed by the last FMA candidate result ore one such result from
3270    previously processed BBs marked in LAST_RESULT_SET.  */
3271
3272 static bool
3273 last_fma_candidate_feeds_initial_phi (fma_deferring_state *state,
3274                                       hash_set<tree> *last_result_set)
3275 {
3276   ssa_op_iter iter;
3277   use_operand_p use;
3278   FOR_EACH_PHI_ARG (use, state->m_initial_phi, iter, SSA_OP_USE)
3279     {
3280       tree t = USE_FROM_PTR (use);
3281       if (t == state->m_last_result
3282           || last_result_set->contains (t))
3283         return true;
3284     }
3285
3286   return false;
3287 }
3288
3289 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3290    with uses in additions and subtractions to form fused multiply-add
3291    operations.  Returns true if successful and MUL_STMT should be removed.
3292    If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3293    on MUL_COND, otherwise it is unconditional.
3294
3295    If STATE indicates that we are deferring FMA transformation, that means
3296    that we do not produce FMAs for basic blocks which look like:
3297
3298     <bb 6>
3299     # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3300     _65 = _14 * _16;
3301     accumulator_66 = _65 + accumulator_111;
3302
3303   or its unrolled version, i.e. with several FMA candidates that feed result
3304   of one into the addend of another.  Instead, we add them to a list in STATE
3305   and if we later discover an FMA candidate that is not part of such a chain,
3306   we go back and perform all deferred past candidates.  */
3307
3308 static bool
3309 convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
3310                      fma_deferring_state *state, tree mul_cond = NULL_TREE)
3311 {
3312   tree mul_result = gimple_get_lhs (mul_stmt);
3313   /* If there isn't a LHS then this can't be an FMA.  There can be no LHS
3314      if the statement was left just for the side-effects.  */
3315   if (!mul_result)
3316     return false;
3317   tree type = TREE_TYPE (mul_result);
3318   gimple *use_stmt, *neguse_stmt;
3319   use_operand_p use_p;
3320   imm_use_iterator imm_iter;
3321
3322   if (FLOAT_TYPE_P (type)
3323       && flag_fp_contract_mode != FP_CONTRACT_FAST)
3324     return false;
3325
3326   /* We don't want to do bitfield reduction ops.  */
3327   if (INTEGRAL_TYPE_P (type)
3328       && (!type_has_mode_precision_p (type) || TYPE_OVERFLOW_TRAPS (type)))
3329     return false;
3330
3331   /* If the target doesn't support it, don't generate it.  We assume that
3332      if fma isn't available then fms, fnma or fnms are not either.  */
3333   optimization_type opt_type = bb_optimization_type (gimple_bb (mul_stmt));
3334   if (!direct_internal_fn_supported_p (IFN_FMA, type, opt_type))
3335     return false;
3336
3337   /* If the multiplication has zero uses, it is kept around probably because
3338      of -fnon-call-exceptions.  Don't optimize it away in that case,
3339      it is DCE job.  */
3340   if (has_zero_uses (mul_result))
3341     return false;
3342
3343   bool check_defer
3344     = (state->m_deferring_p
3345        && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type)),
3346                     param_avoid_fma_max_bits));
3347   bool defer = check_defer;
3348   bool seen_negate_p = false;
3349
3350   /* There is no numerical difference between fused and unfused integer FMAs,
3351      and the assumption below that FMA is as cheap as addition is unlikely
3352      to be true, especially if the multiplication occurs multiple times on
3353      the same chain.  E.g., for something like:
3354
3355          (((a * b) + c) >> 1) + (a * b)
3356
3357      we do not want to duplicate the a * b into two additions, not least
3358      because the result is not a natural FMA chain.  */
3359   if (ANY_INTEGRAL_TYPE_P (type)
3360       && !has_single_use (mul_result))
3361     return false;
3362
3363   /* Make sure that the multiplication statement becomes dead after
3364      the transformation, thus that all uses are transformed to FMAs.
3365      This means we assume that an FMA operation has the same cost
3366      as an addition.  */
3367   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, mul_result)
3368     {
3369       tree result = mul_result;
3370       bool negate_p = false;
3371
3372       use_stmt = USE_STMT (use_p);
3373
3374       if (is_gimple_debug (use_stmt))
3375         continue;
3376
3377       /* For now restrict this operations to single basic blocks.  In theory
3378          we would want to support sinking the multiplication in
3379          m = a*b;
3380          if ()
3381            ma = m + c;
3382          else
3383            d = m;
3384          to form a fma in the then block and sink the multiplication to the
3385          else block.  */
3386       if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3387         return false;
3388
3389       /* A negate on the multiplication leads to FNMA.  */
3390       if (is_gimple_assign (use_stmt)
3391           && gimple_assign_rhs_code (use_stmt) == NEGATE_EXPR)
3392         {
3393           ssa_op_iter iter;
3394           use_operand_p usep;
3395
3396           /* If (due to earlier missed optimizations) we have two
3397              negates of the same value, treat them as equivalent
3398              to a single negate with multiple uses.  */
3399           if (seen_negate_p)
3400             return false;
3401
3402           result = gimple_assign_lhs (use_stmt);
3403
3404           /* Make sure the negate statement becomes dead with this
3405              single transformation.  */
3406           if (!single_imm_use (gimple_assign_lhs (use_stmt),
3407                                &use_p, &neguse_stmt))
3408             return false;
3409
3410           /* Make sure the multiplication isn't also used on that stmt.  */
3411           FOR_EACH_PHI_OR_STMT_USE (usep, neguse_stmt, iter, SSA_OP_USE)
3412             if (USE_FROM_PTR (usep) == mul_result)
3413               return false;
3414
3415           /* Re-validate.  */
3416           use_stmt = neguse_stmt;
3417           if (gimple_bb (use_stmt) != gimple_bb (mul_stmt))
3418             return false;
3419
3420           negate_p = seen_negate_p = true;
3421         }
3422
3423       tree cond, else_value, ops[3];
3424       tree_code code;
3425       if (!can_interpret_as_conditional_op_p (use_stmt, &cond, &code, ops,
3426                                               &else_value))
3427         return false;
3428
3429       switch (code)
3430         {
3431         case MINUS_EXPR:
3432           if (ops[1] == result)
3433             negate_p = !negate_p;
3434           break;
3435         case PLUS_EXPR:
3436           break;
3437         default:
3438           /* FMA can only be formed from PLUS and MINUS.  */
3439           return false;
3440         }
3441
3442       if (mul_cond && cond != mul_cond)
3443         return false;
3444
3445       if (cond)
3446         {
3447           if (cond == result || else_value == result)
3448             return false;
3449           if (!direct_internal_fn_supported_p (IFN_COND_FMA, type, opt_type))
3450             return false;
3451         }
3452
3453       /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3454          we'll visit later, we might be able to get a more profitable
3455          match with fnma.
3456          OTOH, if we don't, a negate / fma pair has likely lower latency
3457          that a mult / subtract pair.  */
3458       if (code == MINUS_EXPR
3459           && !negate_p
3460           && ops[0] == result
3461           && !direct_internal_fn_supported_p (IFN_FMS, type, opt_type)
3462           && direct_internal_fn_supported_p (IFN_FNMA, type, opt_type)
3463           && TREE_CODE (ops[1]) == SSA_NAME
3464           && has_single_use (ops[1]))
3465         {
3466           gimple *stmt2 = SSA_NAME_DEF_STMT (ops[1]);
3467           if (is_gimple_assign (stmt2)
3468               && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
3469             return false;
3470         }
3471
3472       /* We can't handle a * b + a * b.  */
3473       if (ops[0] == ops[1])
3474         return false;
3475       /* If deferring, make sure we are not looking at an instruction that
3476          wouldn't have existed if we were not.  */
3477       if (state->m_deferring_p
3478           && (state->m_mul_result_set.contains (ops[0])
3479               || state->m_mul_result_set.contains (ops[1])))
3480         return false;
3481
3482       if (check_defer)
3483         {
3484           tree use_lhs = gimple_get_lhs (use_stmt);
3485           if (state->m_last_result)
3486             {
3487               if (ops[1] == state->m_last_result
3488                   || ops[0] == state->m_last_result)
3489                 defer = true;
3490               else
3491                 defer = false;
3492             }
3493           else
3494             {
3495               gcc_checking_assert (!state->m_initial_phi);
3496               gphi *phi;
3497               if (ops[0] == result)
3498                 phi = result_of_phi (ops[1]);
3499               else
3500                 {
3501                   gcc_assert (ops[1] == result);
3502                   phi = result_of_phi (ops[0]);
3503                 }
3504
3505               if (phi)
3506                 {
3507                   state->m_initial_phi = phi;
3508                   defer = true;
3509                 }
3510               else
3511                 defer = false;
3512             }
3513
3514           state->m_last_result = use_lhs;
3515           check_defer = false;
3516         }
3517       else
3518         defer = false;
3519
3520       /* While it is possible to validate whether or not the exact form that
3521          we've recognized is available in the backend, the assumption is that
3522          if the deferring logic above did not trigger, the transformation is
3523          never a loss.  For instance, suppose the target only has the plain FMA
3524          pattern available.  Consider a*b-c -> fma(a,b,-c): we've exchanged
3525          MUL+SUB for FMA+NEG, which is still two operations.  Consider
3526          -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3527          form the two NEGs are independent and could be run in parallel.  */
3528     }
3529
3530   if (defer)
3531     {
3532       fma_transformation_info fti;
3533       fti.mul_stmt = mul_stmt;
3534       fti.mul_result = mul_result;
3535       fti.op1 = op1;
3536       fti.op2 = op2;
3537       state->m_candidates.safe_push (fti);
3538       state->m_mul_result_set.add (mul_result);
3539
3540       if (dump_file && (dump_flags & TDF_DETAILS))
3541         {
3542           fprintf (dump_file, "Deferred generating FMA for multiplication ");
3543           print_gimple_stmt (dump_file, mul_stmt, 0, TDF_NONE);
3544           fprintf (dump_file, "\n");
3545         }
3546
3547       return false;
3548     }
3549   else
3550     {
3551       if (state->m_deferring_p)
3552         cancel_fma_deferring (state);
3553       convert_mult_to_fma_1 (mul_result, op1, op2);
3554       return true;
3555     }
3556 }
3557
3558
3559 /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
3560    a check for non-zero like:
3561    _1 = x_4(D) * y_5(D);
3562    *res_7(D) = _1;
3563    if (x_4(D) != 0)
3564      goto <bb 3>; [50.00%]
3565    else
3566      goto <bb 4>; [50.00%]
3567
3568    <bb 3> [local count: 536870913]:
3569    _2 = _1 / x_4(D);
3570    _9 = _2 != y_5(D);
3571    _10 = (int) _9;
3572
3573    <bb 4> [local count: 1073741824]:
3574    # iftmp.0_3 = PHI <_10(3), 0(2)>
3575    then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3576    optimize the x_4(D) != 0 condition to 1.  */
3577
3578 static void
3579 maybe_optimize_guarding_check (vec<gimple *> &mul_stmts, gimple *cond_stmt,
3580                                gimple *div_stmt, bool *cfg_changed)
3581 {
3582   basic_block bb = gimple_bb (cond_stmt);
3583   if (gimple_bb (div_stmt) != bb || !single_pred_p (bb))
3584     return;
3585   edge pred_edge = single_pred_edge (bb);
3586   basic_block pred_bb = pred_edge->src;
3587   if (EDGE_COUNT (pred_bb->succs) != 2)
3588     return;
3589   edge other_edge = EDGE_SUCC (pred_bb, EDGE_SUCC (pred_bb, 0) == pred_edge);
3590   edge other_succ_edge = NULL;
3591   if (gimple_code (cond_stmt) == GIMPLE_COND)
3592     {
3593       if (EDGE_COUNT (bb->succs) != 2)
3594         return;
3595       other_succ_edge = EDGE_SUCC (bb, 0);
3596       if (gimple_cond_code (cond_stmt) == NE_EXPR)
3597         {
3598           if (other_succ_edge->flags & EDGE_TRUE_VALUE)
3599             other_succ_edge = EDGE_SUCC (bb, 1);
3600         }
3601       else if (other_succ_edge->flags & EDGE_FALSE_VALUE)
3602         other_succ_edge = EDGE_SUCC (bb, 0);
3603       if (other_edge->dest != other_succ_edge->dest)
3604         return;
3605     }
3606   else if (!single_succ_p (bb) || other_edge->dest != single_succ (bb))
3607     return;
3608   gcond *zero_cond = safe_dyn_cast <gcond *> (*gsi_last_bb (pred_bb));
3609   if (zero_cond == NULL
3610       || (gimple_cond_code (zero_cond)
3611           != ((pred_edge->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR))
3612       || !integer_zerop (gimple_cond_rhs (zero_cond)))
3613     return;
3614   tree zero_cond_lhs = gimple_cond_lhs (zero_cond);
3615   if (TREE_CODE (zero_cond_lhs) != SSA_NAME)
3616     return;
3617   if (gimple_assign_rhs2 (div_stmt) != zero_cond_lhs)
3618     {
3619       /* Allow the divisor to be result of a same precision cast
3620          from zero_cond_lhs.  */
3621       tree rhs2 = gimple_assign_rhs2 (div_stmt);
3622       if (TREE_CODE (rhs2) != SSA_NAME)
3623         return;
3624       gimple *g = SSA_NAME_DEF_STMT (rhs2);
3625       if (!gimple_assign_cast_p (g)
3626           || gimple_assign_rhs1 (g) != gimple_cond_lhs (zero_cond)
3627           || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs))
3628           || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs))
3629               != TYPE_PRECISION (TREE_TYPE (rhs2))))
3630         return;
3631     }
3632   gimple_stmt_iterator gsi = gsi_after_labels (bb);
3633   mul_stmts.quick_push (div_stmt);
3634   if (is_gimple_debug (gsi_stmt (gsi)))
3635     gsi_next_nondebug (&gsi);
3636   unsigned cast_count = 0;
3637   while (gsi_stmt (gsi) != cond_stmt)
3638     {
3639       /* If original mul_stmt has a single use, allow it in the same bb,
3640          we are looking then just at __builtin_mul_overflow_p.
3641          Though, in that case the original mul_stmt will be replaced
3642          by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts.  */
3643       gimple *mul_stmt;
3644       unsigned int i;
3645       bool ok = false;
3646       FOR_EACH_VEC_ELT (mul_stmts, i, mul_stmt)
3647         {
3648           if (gsi_stmt (gsi) == mul_stmt)
3649             {
3650               ok = true;
3651               break;
3652             }
3653         }
3654       if (!ok && gimple_assign_cast_p (gsi_stmt (gsi)) && ++cast_count < 4)
3655         ok = true;
3656       if (!ok)
3657         return;
3658       gsi_next_nondebug (&gsi);
3659     }
3660   if (gimple_code (cond_stmt) == GIMPLE_COND)
3661     {
3662       basic_block succ_bb = other_edge->dest;
3663       for (gphi_iterator gpi = gsi_start_phis (succ_bb); !gsi_end_p (gpi);
3664            gsi_next (&gpi))
3665         {
3666           gphi *phi = gpi.phi ();
3667           tree v1 = gimple_phi_arg_def (phi, other_edge->dest_idx);
3668           tree v2 = gimple_phi_arg_def (phi, other_succ_edge->dest_idx);
3669           if (!operand_equal_p (v1, v2, 0))
3670             return;
3671         }
3672     }
3673   else
3674     {
3675       tree lhs = gimple_assign_lhs (cond_stmt);
3676       if (!lhs || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
3677         return;
3678       gsi_next_nondebug (&gsi);
3679       if (!gsi_end_p (gsi))
3680         {
3681           if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3682             return;
3683           gimple *cast_stmt = gsi_stmt (gsi);
3684           if (!gimple_assign_cast_p (cast_stmt))
3685             return;
3686           tree new_lhs = gimple_assign_lhs (cast_stmt);
3687           gsi_next_nondebug (&gsi);
3688           if (!gsi_end_p (gsi)
3689               || !new_lhs
3690               || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs))
3691               || TYPE_PRECISION (TREE_TYPE (new_lhs)) <= 1)
3692             return;
3693           lhs = new_lhs;
3694         }
3695       edge succ_edge = single_succ_edge (bb);
3696       basic_block succ_bb = succ_edge->dest;
3697       gsi = gsi_start_phis (succ_bb);
3698       if (gsi_end_p (gsi))
3699         return;
3700       gphi *phi = as_a <gphi *> (gsi_stmt (gsi));
3701       gsi_next (&gsi);
3702       if (!gsi_end_p (gsi))
3703         return;
3704       if (gimple_phi_arg_def (phi, succ_edge->dest_idx) != lhs)
3705         return;
3706       tree other_val = gimple_phi_arg_def (phi, other_edge->dest_idx);
3707       if (gimple_assign_rhs_code (cond_stmt) == COND_EXPR)
3708         {
3709           tree cond = gimple_assign_rhs1 (cond_stmt);
3710           if (TREE_CODE (cond) == NE_EXPR)
3711             {
3712               if (!operand_equal_p (other_val,
3713                                     gimple_assign_rhs3 (cond_stmt), 0))
3714                 return;
3715             }
3716           else if (!operand_equal_p (other_val,
3717                                      gimple_assign_rhs2 (cond_stmt), 0))
3718             return;
3719         }
3720       else if (gimple_assign_rhs_code (cond_stmt) == NE_EXPR)
3721         {
3722           if (!integer_zerop (other_val))
3723             return;
3724         }
3725       else if (!integer_onep (other_val))
3726         return;
3727     }
3728   if (pred_edge->flags & EDGE_TRUE_VALUE)
3729     gimple_cond_make_true (zero_cond);
3730   else
3731     gimple_cond_make_false (zero_cond);
3732   update_stmt (zero_cond);
3733   *cfg_changed = true;
3734 }
3735
3736 /* Helper function for arith_overflow_check_p.  Return true
3737    if VAL1 is equal to VAL2 cast to corresponding integral type
3738    with other signedness or vice versa.  */
3739
3740 static bool
3741 arith_cast_equal_p (tree val1, tree val2)
3742 {
3743   if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
3744     return wi::eq_p (wi::to_wide (val1), wi::to_wide (val2));
3745   else if (TREE_CODE (val1) != SSA_NAME || TREE_CODE (val2) != SSA_NAME)
3746     return false;
3747   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1))
3748       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1)) == val2)
3749     return true;
3750   if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2))
3751       && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2)) == val1)
3752     return true;
3753   return false;
3754 }
3755
3756 /* Helper function of match_arith_overflow.  Return 1
3757    if USE_STMT is unsigned overflow check ovf != 0 for
3758    STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3759    and 0 otherwise.  */
3760
3761 static int
3762 arith_overflow_check_p (gimple *stmt, gimple *cast_stmt, gimple *&use_stmt,
3763                         tree maxval, tree *other)
3764 {
3765   enum tree_code ccode = ERROR_MARK;
3766   tree crhs1 = NULL_TREE, crhs2 = NULL_TREE;
3767   enum tree_code code = gimple_assign_rhs_code (stmt);
3768   tree lhs = gimple_assign_lhs (cast_stmt ? cast_stmt : stmt);
3769   tree rhs1 = gimple_assign_rhs1 (stmt);
3770   tree rhs2 = gimple_assign_rhs2 (stmt);
3771   tree multop = NULL_TREE, divlhs = NULL_TREE;
3772   gimple *cur_use_stmt = use_stmt;
3773
3774   if (code == MULT_EXPR)
3775     {
3776       if (!is_gimple_assign (use_stmt))
3777         return 0;
3778       if (gimple_assign_rhs_code (use_stmt) != TRUNC_DIV_EXPR)
3779         return 0;
3780       if (gimple_assign_rhs1 (use_stmt) != lhs)
3781         return 0;
3782       if (cast_stmt)
3783         {
3784           if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs1))
3785             multop = rhs2;
3786           else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt), rhs2))
3787             multop = rhs1;
3788           else
3789             return 0;
3790         }
3791       else if (gimple_assign_rhs2 (use_stmt) == rhs1)
3792         multop = rhs2;
3793       else if (operand_equal_p (gimple_assign_rhs2 (use_stmt), rhs2, 0))
3794         multop = rhs1;
3795       else
3796         return 0;
3797       if (stmt_ends_bb_p (use_stmt))
3798         return 0;
3799       divlhs = gimple_assign_lhs (use_stmt);
3800       if (!divlhs)
3801         return 0;
3802       use_operand_p use;
3803       if (!single_imm_use (divlhs, &use, &cur_use_stmt))
3804         return 0;
3805     }
3806   if (gimple_code (cur_use_stmt) == GIMPLE_COND)
3807     {
3808       ccode = gimple_cond_code (cur_use_stmt);
3809       crhs1 = gimple_cond_lhs (cur_use_stmt);
3810       crhs2 = gimple_cond_rhs (cur_use_stmt);
3811     }
3812   else if (is_gimple_assign (cur_use_stmt))
3813     {
3814       if (gimple_assign_rhs_class (cur_use_stmt) == GIMPLE_BINARY_RHS)
3815         {
3816           ccode = gimple_assign_rhs_code (cur_use_stmt);
3817           crhs1 = gimple_assign_rhs1 (cur_use_stmt);
3818           crhs2 = gimple_assign_rhs2 (cur_use_stmt);
3819         }
3820       else if (gimple_assign_rhs_code (cur_use_stmt) == COND_EXPR)
3821         {
3822           tree cond = gimple_assign_rhs1 (cur_use_stmt);
3823           if (COMPARISON_CLASS_P (cond))
3824             {
3825               ccode = TREE_CODE (cond);
3826               crhs1 = TREE_OPERAND (cond, 0);
3827               crhs2 = TREE_OPERAND (cond, 1);
3828             }
3829           else
3830             return 0;
3831         }
3832       else
3833         return 0;
3834     }
3835   else
3836     return 0;
3837
3838   if (TREE_CODE_CLASS (ccode) != tcc_comparison)
3839     return 0;
3840
3841   switch (ccode)
3842     {
3843     case GT_EXPR:
3844     case LE_EXPR:
3845       if (maxval)
3846         {
3847           /* r = a + b; r > maxval or r <= maxval  */
3848           if (crhs1 == lhs
3849               && TREE_CODE (crhs2) == INTEGER_CST
3850               && tree_int_cst_equal (crhs2, maxval))
3851             return ccode == GT_EXPR ? 1 : -1;
3852           break;
3853         }
3854       /* r = a - b; r > a or r <= a
3855          r = a + b; a > r or a <= r or b > r or b <= r.  */
3856       if ((code == MINUS_EXPR && crhs1 == lhs && crhs2 == rhs1)
3857           || (code == PLUS_EXPR && (crhs1 == rhs1 || crhs1 == rhs2)
3858               && crhs2 == lhs))
3859         return ccode == GT_EXPR ? 1 : -1;
3860       /* r = ~a; b > r or b <= r.  */
3861       if (code == BIT_NOT_EXPR && crhs2 == lhs)
3862         {
3863           if (other)
3864             *other = crhs1;
3865           return ccode == GT_EXPR ? 1 : -1;
3866         }
3867       break;
3868     case LT_EXPR:
3869     case GE_EXPR:
3870       if (maxval)
3871         break;
3872       /* r = a - b; a < r or a >= r
3873          r = a + b; r < a or r >= a or r < b or r >= b.  */
3874       if ((code == MINUS_EXPR && crhs1 == rhs1 && crhs2 == lhs)
3875           || (code == PLUS_EXPR && crhs1 == lhs
3876               && (crhs2 == rhs1 || crhs2 == rhs2)))
3877         return ccode == LT_EXPR ? 1 : -1;
3878       /* r = ~a; r < b or r >= b.  */
3879       if (code == BIT_NOT_EXPR && crhs1 == lhs)
3880         {
3881           if (other)
3882             *other = crhs2;
3883           return ccode == LT_EXPR ? 1 : -1;
3884         }
3885       break;
3886     case EQ_EXPR:
3887     case NE_EXPR:
3888       /* r = a * b; _1 = r / a; _1 == b
3889          r = a * b; _1 = r / b; _1 == a
3890          r = a * b; _1 = r / a; _1 != b
3891          r = a * b; _1 = r / b; _1 != a.  */
3892       if (code == MULT_EXPR)
3893         {
3894           if (cast_stmt)
3895             {
3896               if ((crhs1 == divlhs && arith_cast_equal_p (crhs2, multop))
3897                   || (crhs2 == divlhs && arith_cast_equal_p (crhs1, multop)))
3898                 {
3899                   use_stmt = cur_use_stmt;
3900                   return ccode == NE_EXPR ? 1 : -1;
3901                 }
3902             }
3903           else if ((crhs1 == divlhs && operand_equal_p (crhs2, multop, 0))
3904                    || (crhs2 == divlhs && crhs1 == multop))
3905             {
3906               use_stmt = cur_use_stmt;
3907               return ccode == NE_EXPR ? 1 : -1;
3908             }
3909         }
3910       break;
3911     default:
3912       break;
3913     }
3914   return 0;
3915 }
3916
3917 /* Recognize for unsigned x
3918    x = y - z;
3919    if (x > y)
3920    where there are other uses of x and replace it with
3921    _7 = .SUB_OVERFLOW (y, z);
3922    x = REALPART_EXPR <_7>;
3923    _8 = IMAGPART_EXPR <_7>;
3924    if (_8)
3925    and similarly for addition.
3926
3927    Also recognize:
3928    yc = (type) y;
3929    zc = (type) z;
3930    x = yc + zc;
3931    if (x > max)
3932    where y and z have unsigned types with maximum max
3933    and there are other uses of x and all of those cast x
3934    back to that unsigned type and again replace it with
3935    _7 = .ADD_OVERFLOW (y, z);
3936    _9 = REALPART_EXPR <_7>;
3937    _8 = IMAGPART_EXPR <_7>;
3938    if (_8)
3939    and replace (utype) x with _9.
3940
3941    Also recognize:
3942    x = ~z;
3943    if (y > x)
3944    and replace it with
3945    _7 = .ADD_OVERFLOW (y, z);
3946    _8 = IMAGPART_EXPR <_7>;
3947    if (_8)
3948
3949    And also recognize:
3950    z = x * y;
3951    if (x != 0)
3952      goto <bb 3>; [50.00%]
3953    else
3954      goto <bb 4>; [50.00%]
3955
3956    <bb 3> [local count: 536870913]:
3957    _2 = z / x;
3958    _9 = _2 != y;
3959    _10 = (int) _9;
3960
3961    <bb 4> [local count: 1073741824]:
3962    # iftmp.0_3 = PHI <_10(3), 0(2)>
3963    and replace it with
3964    _7 = .MUL_OVERFLOW (x, y);
3965    z = IMAGPART_EXPR <_7>;
3966    _8 = IMAGPART_EXPR <_7>;
3967    _9 = _8 != 0;
3968    iftmp.0_3 = (int) _9;  */
3969
3970 static bool
3971 match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
3972                       enum tree_code code, bool *cfg_changed)
3973 {
3974   tree lhs = gimple_assign_lhs (stmt);
3975   tree type = TREE_TYPE (lhs);
3976   use_operand_p use_p;
3977   imm_use_iterator iter;
3978   bool use_seen = false;
3979   bool ovf_use_seen = false;
3980   gimple *use_stmt;
3981   gimple *add_stmt = NULL;
3982   bool add_first = false;
3983   gimple *cond_stmt = NULL;
3984   gimple *cast_stmt = NULL;
3985   tree cast_lhs = NULL_TREE;
3986
3987   gcc_checking_assert (code == PLUS_EXPR
3988                        || code == MINUS_EXPR
3989                        || code == MULT_EXPR
3990                        || code == BIT_NOT_EXPR);
3991   if (!INTEGRAL_TYPE_P (type)
3992       || !TYPE_UNSIGNED (type)
3993       || has_zero_uses (lhs)
3994       || (code != PLUS_EXPR
3995           && code != MULT_EXPR
3996           && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
3997                             TYPE_MODE (type)) == CODE_FOR_nothing))
3998     return false;
3999
4000   tree rhs1 = gimple_assign_rhs1 (stmt);
4001   tree rhs2 = gimple_assign_rhs2 (stmt);
4002   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4003     {
4004       use_stmt = USE_STMT (use_p);
4005       if (is_gimple_debug (use_stmt))
4006         continue;
4007
4008       tree other = NULL_TREE;
4009       if (arith_overflow_check_p (stmt, NULL, use_stmt, NULL_TREE, &other))
4010         {
4011           if (code == BIT_NOT_EXPR)
4012             {
4013               gcc_assert (other);
4014               if (TREE_CODE (other) != SSA_NAME)
4015                 return false;
4016               if (rhs2 == NULL)
4017                 rhs2 = other;
4018               else
4019                 return false;
4020               cond_stmt = use_stmt;
4021             }
4022           ovf_use_seen = true;
4023         }
4024       else
4025         {
4026           use_seen = true;
4027           if (code == MULT_EXPR
4028               && cast_stmt == NULL
4029               && gimple_assign_cast_p (use_stmt))
4030             {
4031               cast_lhs = gimple_assign_lhs (use_stmt);
4032               if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs))
4033                   && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs))
4034                   && (TYPE_PRECISION (TREE_TYPE (cast_lhs))
4035                       == TYPE_PRECISION (TREE_TYPE (lhs))))
4036                 cast_stmt = use_stmt;
4037               else
4038                 cast_lhs = NULL_TREE;
4039             }
4040         }
4041       if (ovf_use_seen && use_seen)
4042         break;
4043     }
4044
4045   if (!ovf_use_seen
4046       && code == MULT_EXPR
4047       && cast_stmt)
4048     {
4049       if (TREE_CODE (rhs1) != SSA_NAME
4050           || (TREE_CODE (rhs2) != SSA_NAME && TREE_CODE (rhs2) != INTEGER_CST))
4051         return false;
4052       FOR_EACH_IMM_USE_FAST (use_p, iter, cast_lhs)
4053         {
4054           use_stmt = USE_STMT (use_p);
4055           if (is_gimple_debug (use_stmt))
4056             continue;
4057
4058           if (arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4059                                       NULL_TREE, NULL))
4060             ovf_use_seen = true;
4061         }
4062     }
4063   else
4064     {
4065       cast_stmt = NULL;
4066       cast_lhs = NULL_TREE;
4067     }
4068
4069   tree maxval = NULL_TREE;
4070   if (!ovf_use_seen
4071       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
4072       || (code == PLUS_EXPR
4073           && optab_handler (uaddv4_optab,
4074                             TYPE_MODE (type)) == CODE_FOR_nothing)
4075       || (code == MULT_EXPR
4076           && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
4077                             TYPE_MODE (type)) == CODE_FOR_nothing))
4078     {
4079       if (code != PLUS_EXPR)
4080         return false;
4081       if (TREE_CODE (rhs1) != SSA_NAME
4082           || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1)))
4083         return false;
4084       rhs1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1));
4085       tree type1 = TREE_TYPE (rhs1);
4086       if (!INTEGRAL_TYPE_P (type1)
4087           || !TYPE_UNSIGNED (type1)
4088           || TYPE_PRECISION (type1) >= TYPE_PRECISION (type)
4089           || (TYPE_PRECISION (type1)
4090               != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1))))
4091         return false;
4092       if (TREE_CODE (rhs2) == INTEGER_CST)
4093         {
4094           if (wi::ne_p (wi::rshift (wi::to_wide (rhs2),
4095                                     TYPE_PRECISION (type1),
4096                                     UNSIGNED), 0))
4097             return false;
4098           rhs2 = fold_convert (type1, rhs2);
4099         }
4100       else
4101         {
4102           if (TREE_CODE (rhs2) != SSA_NAME
4103               || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2)))
4104             return false;
4105           rhs2 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2));
4106           tree type2 = TREE_TYPE (rhs2);
4107           if (!INTEGRAL_TYPE_P (type2)
4108               || !TYPE_UNSIGNED (type2)
4109               || TYPE_PRECISION (type2) >= TYPE_PRECISION (type)
4110               || (TYPE_PRECISION (type2)
4111                   != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2))))
4112             return false;
4113         }
4114       if (TYPE_PRECISION (type1) >= TYPE_PRECISION (TREE_TYPE (rhs2)))
4115         type = type1;
4116       else
4117         type = TREE_TYPE (rhs2);
4118
4119       if (TREE_CODE (type) != INTEGER_TYPE
4120           || optab_handler (uaddv4_optab,
4121                             TYPE_MODE (type)) == CODE_FOR_nothing)
4122         return false;
4123
4124       maxval = wide_int_to_tree (type, wi::max_value (TYPE_PRECISION (type),
4125                                                       UNSIGNED));
4126       ovf_use_seen = false;
4127       use_seen = false;
4128       basic_block use_bb = NULL;
4129       FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
4130         {
4131           use_stmt = USE_STMT (use_p);
4132           if (is_gimple_debug (use_stmt))
4133             continue;
4134
4135           if (arith_overflow_check_p (stmt, NULL, use_stmt, maxval, NULL))
4136             {
4137               ovf_use_seen = true;
4138               use_bb = gimple_bb (use_stmt);
4139             }
4140           else
4141             {
4142               if (!gimple_assign_cast_p (use_stmt)
4143                   || gimple_assign_rhs_code (use_stmt) == VIEW_CONVERT_EXPR)
4144                 return false;
4145               tree use_lhs = gimple_assign_lhs (use_stmt);
4146               if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs))
4147                   || (TYPE_PRECISION (TREE_TYPE (use_lhs))
4148                       > TYPE_PRECISION (type)))
4149                 return false;
4150               use_seen = true;
4151             }
4152         }
4153       if (!ovf_use_seen)
4154         return false;
4155       if (!useless_type_conversion_p (type, TREE_TYPE (rhs1)))
4156         {
4157           if (!use_seen)
4158             return false;
4159           tree new_rhs1 = make_ssa_name (type);
4160           gimple *g = gimple_build_assign (new_rhs1, NOP_EXPR, rhs1);
4161           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4162           rhs1 = new_rhs1;
4163         }
4164       else if (!useless_type_conversion_p (type, TREE_TYPE (rhs2)))
4165         {
4166           if (!use_seen)
4167             return false;
4168           tree new_rhs2 = make_ssa_name (type);
4169           gimple *g = gimple_build_assign (new_rhs2, NOP_EXPR, rhs2);
4170           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4171           rhs2 = new_rhs2;
4172         }
4173       else if (!use_seen)
4174         {
4175           /* If there are no uses of the wider addition, check if
4176              forwprop has not created a narrower addition.
4177              Require it to be in the same bb as the overflow check.  */
4178           FOR_EACH_IMM_USE_FAST (use_p, iter, rhs1)
4179             {
4180               use_stmt = USE_STMT (use_p);
4181               if (is_gimple_debug (use_stmt))
4182                 continue;
4183
4184               if (use_stmt == stmt)
4185                 continue;
4186
4187               if (!is_gimple_assign (use_stmt)
4188                   || gimple_bb (use_stmt) != use_bb
4189                   || gimple_assign_rhs_code (use_stmt) != PLUS_EXPR)
4190                 continue;
4191
4192               if (gimple_assign_rhs1 (use_stmt) == rhs1)
4193                 {
4194                   if (!operand_equal_p (gimple_assign_rhs2 (use_stmt),
4195                                         rhs2, 0))
4196                     continue;
4197                 }
4198               else if (gimple_assign_rhs2 (use_stmt) == rhs1)
4199                 {
4200                   if (gimple_assign_rhs1 (use_stmt) != rhs2)
4201                     continue;
4202                 }
4203               else
4204                 continue;
4205
4206               add_stmt = use_stmt;
4207               break;
4208             }
4209           if (add_stmt == NULL)
4210             return false;
4211
4212           /* If stmt and add_stmt are in the same bb, we need to find out
4213              which one is earlier.  If they are in different bbs, we've
4214              checked add_stmt is in the same bb as one of the uses of the
4215              stmt lhs, so stmt needs to dominate add_stmt too.  */
4216           if (gimple_bb (stmt) == gimple_bb (add_stmt))
4217             {
4218               gimple_stmt_iterator gsif = *gsi;
4219               gimple_stmt_iterator gsib = *gsi;
4220               int i;
4221               /* Search both forward and backward from stmt and have a small
4222                  upper bound.  */
4223               for (i = 0; i < 128; i++)
4224                 {
4225                   if (!gsi_end_p (gsib))
4226                     {
4227                       gsi_prev_nondebug (&gsib);
4228                       if (gsi_stmt (gsib) == add_stmt)
4229                         {
4230                           add_first = true;
4231                           break;
4232                         }
4233                     }
4234                   else if (gsi_end_p (gsif))
4235                     break;
4236                   if (!gsi_end_p (gsif))
4237                     {
4238                       gsi_next_nondebug (&gsif);
4239                       if (gsi_stmt (gsif) == add_stmt)
4240                         break;
4241                     }
4242                 }
4243               if (i == 128)
4244                 return false;
4245               if (add_first)
4246                 *gsi = gsi_for_stmt (add_stmt);
4247             }
4248         }
4249     }
4250
4251   if (code == BIT_NOT_EXPR)
4252     *gsi = gsi_for_stmt (cond_stmt);
4253
4254   auto_vec<gimple *, 8> mul_stmts;
4255   if (code == MULT_EXPR && cast_stmt)
4256     {
4257       type = TREE_TYPE (cast_lhs);
4258       gimple *g = SSA_NAME_DEF_STMT (rhs1);
4259       if (gimple_assign_cast_p (g)
4260           && useless_type_conversion_p (type,
4261                                         TREE_TYPE (gimple_assign_rhs1 (g)))
4262           && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4263         rhs1 = gimple_assign_rhs1 (g);
4264       else
4265         {
4266           g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs1);
4267           gsi_insert_before (gsi, g, GSI_SAME_STMT);
4268           rhs1 = gimple_assign_lhs (g);
4269           mul_stmts.quick_push (g);
4270         }
4271       if (TREE_CODE (rhs2) == INTEGER_CST)
4272         rhs2 = fold_convert (type, rhs2);
4273       else
4274         {
4275           g = SSA_NAME_DEF_STMT (rhs2);
4276           if (gimple_assign_cast_p (g)
4277               && useless_type_conversion_p (type,
4278                                             TREE_TYPE (gimple_assign_rhs1 (g)))
4279               && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
4280             rhs2 = gimple_assign_rhs1 (g);
4281           else
4282             {
4283               g = gimple_build_assign (make_ssa_name (type), NOP_EXPR, rhs2);
4284               gsi_insert_before (gsi, g, GSI_SAME_STMT);
4285               rhs2 = gimple_assign_lhs (g);
4286               mul_stmts.quick_push (g);
4287             }
4288         }
4289     }
4290   tree ctype = build_complex_type (type);
4291   gcall *g = gimple_build_call_internal (code == MULT_EXPR
4292                                          ? IFN_MUL_OVERFLOW
4293                                          : code != MINUS_EXPR
4294                                          ? IFN_ADD_OVERFLOW : IFN_SUB_OVERFLOW,
4295                                          2, rhs1, rhs2);
4296   tree ctmp = make_ssa_name (ctype);
4297   gimple_call_set_lhs (g, ctmp);
4298   gsi_insert_before (gsi, g, GSI_SAME_STMT);
4299   tree new_lhs = (maxval || cast_stmt) ? make_ssa_name (type) : lhs;
4300   gassign *g2;
4301   if (code != BIT_NOT_EXPR)
4302     {
4303       g2 = gimple_build_assign (new_lhs, REALPART_EXPR,
4304                                 build1 (REALPART_EXPR, type, ctmp));
4305       if (maxval || cast_stmt)
4306         {
4307           gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4308           if (add_first)
4309             *gsi = gsi_for_stmt (stmt);
4310         }
4311       else
4312         gsi_replace (gsi, g2, true);
4313       if (code == MULT_EXPR)
4314         {
4315           mul_stmts.quick_push (g);
4316           mul_stmts.quick_push (g2);
4317           if (cast_stmt)
4318             {
4319               g2 = gimple_build_assign (lhs, NOP_EXPR, new_lhs);
4320               gsi_replace (gsi, g2, true);
4321               mul_stmts.quick_push (g2);
4322             }
4323         }
4324     }
4325   tree ovf = make_ssa_name (type);
4326   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
4327                             build1 (IMAGPART_EXPR, type, ctmp));
4328   if (code != BIT_NOT_EXPR)
4329     gsi_insert_after (gsi, g2, GSI_NEW_STMT);
4330   else
4331     gsi_insert_before (gsi, g2, GSI_SAME_STMT);
4332   if (code == MULT_EXPR)
4333     mul_stmts.quick_push (g2);
4334
4335   FOR_EACH_IMM_USE_STMT (use_stmt, iter, cast_lhs ? cast_lhs : lhs)
4336     {
4337       if (is_gimple_debug (use_stmt))
4338         continue;
4339
4340       gimple *orig_use_stmt = use_stmt;
4341       int ovf_use = arith_overflow_check_p (stmt, cast_stmt, use_stmt,
4342                                             maxval, NULL);
4343       if (ovf_use == 0)
4344         {
4345           gcc_assert (code != BIT_NOT_EXPR);
4346           if (maxval)
4347             {
4348               tree use_lhs = gimple_assign_lhs (use_stmt);
4349               gimple_assign_set_rhs1 (use_stmt, new_lhs);
4350               if (useless_type_conversion_p (TREE_TYPE (use_lhs),
4351                                              TREE_TYPE (new_lhs)))
4352                 gimple_assign_set_rhs_code (use_stmt, SSA_NAME);
4353               update_stmt (use_stmt);
4354             }
4355           continue;
4356         }
4357       if (gimple_code (use_stmt) == GIMPLE_COND)
4358         {
4359           gcond *cond_stmt = as_a <gcond *> (use_stmt);
4360           gimple_cond_set_lhs (cond_stmt, ovf);
4361           gimple_cond_set_rhs (cond_stmt, build_int_cst (type, 0));
4362           gimple_cond_set_code (cond_stmt, ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4363         }
4364       else
4365         {
4366           gcc_checking_assert (is_gimple_assign (use_stmt));
4367           if (gimple_assign_rhs_class (use_stmt) == GIMPLE_BINARY_RHS)
4368             {
4369               gimple_assign_set_rhs1 (use_stmt, ovf);
4370               gimple_assign_set_rhs2 (use_stmt, build_int_cst (type, 0));
4371               gimple_assign_set_rhs_code (use_stmt,
4372                                           ovf_use == 1 ? NE_EXPR : EQ_EXPR);
4373             }
4374           else
4375             {
4376               gcc_checking_assert (gimple_assign_rhs_code (use_stmt)
4377                                    == COND_EXPR);
4378               tree cond = build2 (ovf_use == 1 ? NE_EXPR : EQ_EXPR,
4379                                   boolean_type_node, ovf,
4380                                   build_int_cst (type, 0));
4381               gimple_assign_set_rhs1 (use_stmt, cond);
4382             }
4383         }
4384       update_stmt (use_stmt);
4385       if (code == MULT_EXPR && use_stmt != orig_use_stmt)
4386         {
4387           gimple_stmt_iterator gsi2 = gsi_for_stmt (orig_use_stmt);
4388           maybe_optimize_guarding_check (mul_stmts, use_stmt, orig_use_stmt,
4389                                          cfg_changed);
4390           gsi_remove (&gsi2, true);
4391           release_ssa_name (gimple_assign_lhs (orig_use_stmt));
4392         }
4393     }
4394   if (maxval)
4395     {
4396       gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt);
4397       gsi_remove (&gsi2, true);
4398       if (add_stmt)
4399         {
4400           gimple *g = gimple_build_assign (gimple_assign_lhs (add_stmt),
4401                                            new_lhs);
4402           gsi2 = gsi_for_stmt (add_stmt);
4403           gsi_replace (&gsi2, g, true);
4404         }
4405     }
4406   else if (code == BIT_NOT_EXPR)
4407     {
4408       *gsi = gsi_for_stmt (stmt);
4409       gsi_remove (gsi, true);
4410       release_ssa_name (lhs);
4411       return true;
4412     }
4413   return false;
4414 }
4415
4416 /* Return true if target has support for divmod.  */
4417
4418 static bool
4419 target_supports_divmod_p (optab divmod_optab, optab div_optab, machine_mode mode)
4420 {
4421   /* If target supports hardware divmod insn, use it for divmod.  */
4422   if (optab_handler (divmod_optab, mode) != CODE_FOR_nothing)
4423     return true;
4424
4425   /* Check if libfunc for divmod is available.  */
4426   rtx libfunc = optab_libfunc (divmod_optab, mode);
4427   if (libfunc != NULL_RTX)
4428     {
4429       /* If optab_handler exists for div_optab, perhaps in a wider mode,
4430          we don't want to use the libfunc even if it exists for given mode.  */
4431       machine_mode div_mode;
4432       FOR_EACH_MODE_FROM (div_mode, mode)
4433         if (optab_handler (div_optab, div_mode) != CODE_FOR_nothing)
4434           return false;
4435
4436       return targetm.expand_divmod_libfunc != NULL;
4437     }
4438
4439   return false;
4440 }
4441
4442 /* Check if stmt is candidate for divmod transform.  */
4443
4444 static bool
4445 divmod_candidate_p (gassign *stmt)
4446 {
4447   tree type = TREE_TYPE (gimple_assign_lhs (stmt));
4448   machine_mode mode = TYPE_MODE (type);
4449   optab divmod_optab, div_optab;
4450
4451   if (TYPE_UNSIGNED (type))
4452     {
4453       divmod_optab = udivmod_optab;
4454       div_optab = udiv_optab;
4455     }
4456   else
4457     {
4458       divmod_optab = sdivmod_optab;
4459       div_optab = sdiv_optab;
4460     }
4461
4462   tree op1 = gimple_assign_rhs1 (stmt);
4463   tree op2 = gimple_assign_rhs2 (stmt);
4464
4465   /* Disable the transform if either is a constant, since division-by-constant
4466      may have specialized expansion.  */
4467   if (CONSTANT_CLASS_P (op1))
4468     return false;
4469
4470   if (CONSTANT_CLASS_P (op2))
4471     {
4472       if (integer_pow2p (op2))
4473         return false;
4474
4475       if (TYPE_PRECISION (type) <= HOST_BITS_PER_WIDE_INT
4476           && TYPE_PRECISION (type) <= BITS_PER_WORD)
4477         return false;
4478
4479       /* If the divisor is not power of 2 and the precision wider than
4480          HWI, expand_divmod punts on that, so in that case it is better
4481          to use divmod optab or libfunc.  Similarly if choose_multiplier
4482          might need pre/post shifts of BITS_PER_WORD or more.  */
4483     }
4484
4485   /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4486      expand using the [su]divv optabs.  */
4487   if (TYPE_OVERFLOW_TRAPS (type))
4488     return false;
4489
4490   if (!target_supports_divmod_p (divmod_optab, div_optab, mode))
4491     return false;
4492
4493   return true;
4494 }
4495
4496 /* This function looks for:
4497    t1 = a TRUNC_DIV_EXPR b;
4498    t2 = a TRUNC_MOD_EXPR b;
4499    and transforms it to the following sequence:
4500    complex_tmp = DIVMOD (a, b);
4501    t1 = REALPART_EXPR(a);
4502    t2 = IMAGPART_EXPR(b);
4503    For conditions enabling the transform see divmod_candidate_p().
4504
4505    The pass has three parts:
4506    1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4507       other trunc_div_expr and trunc_mod_expr stmts.
4508    2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4509       to stmts vector.
4510    3) Insert DIVMOD call just before top_stmt and update entries in
4511       stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4512       IMAGPART_EXPR for mod).  */
4513
4514 static bool
4515 convert_to_divmod (gassign *stmt)
4516 {
4517   if (stmt_can_throw_internal (cfun, stmt)
4518       || !divmod_candidate_p (stmt))
4519     return false;
4520
4521   tree op1 = gimple_assign_rhs1 (stmt);
4522   tree op2 = gimple_assign_rhs2 (stmt);
4523
4524   imm_use_iterator use_iter;
4525   gimple *use_stmt;
4526   auto_vec<gimple *> stmts;
4527
4528   gimple *top_stmt = stmt;
4529   basic_block top_bb = gimple_bb (stmt);
4530
4531   /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4532      at-least stmt and possibly other trunc_div/trunc_mod stmts
4533      having same operands as stmt.  */
4534
4535   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, op1)
4536     {
4537       if (is_gimple_assign (use_stmt)
4538           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4539               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4540           && operand_equal_p (op1, gimple_assign_rhs1 (use_stmt), 0)
4541           && operand_equal_p (op2, gimple_assign_rhs2 (use_stmt), 0))
4542         {
4543           if (stmt_can_throw_internal (cfun, use_stmt))
4544             continue;
4545
4546           basic_block bb = gimple_bb (use_stmt);
4547
4548           if (bb == top_bb)
4549             {
4550               if (gimple_uid (use_stmt) < gimple_uid (top_stmt))
4551                 top_stmt = use_stmt;
4552             }
4553           else if (dominated_by_p (CDI_DOMINATORS, top_bb, bb))
4554             {
4555               top_bb = bb;
4556               top_stmt = use_stmt;
4557             }
4558         }
4559     }
4560
4561   tree top_op1 = gimple_assign_rhs1 (top_stmt);
4562   tree top_op2 = gimple_assign_rhs2 (top_stmt);
4563
4564   stmts.safe_push (top_stmt);
4565   bool div_seen = (gimple_assign_rhs_code (top_stmt) == TRUNC_DIV_EXPR);
4566
4567   /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4568      to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4569      gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4570      2nd loop ends up adding at-least single trunc_mod_expr stmt.  */
4571
4572   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, top_op1)
4573     {
4574       if (is_gimple_assign (use_stmt)
4575           && (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR
4576               || gimple_assign_rhs_code (use_stmt) == TRUNC_MOD_EXPR)
4577           && operand_equal_p (top_op1, gimple_assign_rhs1 (use_stmt), 0)
4578           && operand_equal_p (top_op2, gimple_assign_rhs2 (use_stmt), 0))
4579         {
4580           if (use_stmt == top_stmt
4581               || stmt_can_throw_internal (cfun, use_stmt)
4582               || !dominated_by_p (CDI_DOMINATORS, gimple_bb (use_stmt), top_bb))
4583             continue;
4584
4585           stmts.safe_push (use_stmt);
4586           if (gimple_assign_rhs_code (use_stmt) == TRUNC_DIV_EXPR)
4587             div_seen = true;
4588         }
4589     }
4590
4591   if (!div_seen)
4592     return false;
4593
4594   /* Part 3: Create libcall to internal fn DIVMOD:
4595      divmod_tmp = DIVMOD (op1, op2).  */
4596
4597   gcall *call_stmt = gimple_build_call_internal (IFN_DIVMOD, 2, op1, op2);
4598   tree res = make_temp_ssa_name (build_complex_type (TREE_TYPE (op1)),
4599                                  call_stmt, "divmod_tmp");
4600   gimple_call_set_lhs (call_stmt, res);
4601   /* We rejected throwing statements above.  */
4602   gimple_call_set_nothrow (call_stmt, true);
4603
4604   /* Insert the call before top_stmt.  */
4605   gimple_stmt_iterator top_stmt_gsi = gsi_for_stmt (top_stmt);
4606   gsi_insert_before (&top_stmt_gsi, call_stmt, GSI_SAME_STMT);
4607
4608   widen_mul_stats.divmod_calls_inserted++;
4609
4610   /* Update all statements in stmts vector:
4611      lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4612      lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>.  */
4613
4614   for (unsigned i = 0; stmts.iterate (i, &use_stmt); ++i)
4615     {
4616       tree new_rhs;
4617
4618       switch (gimple_assign_rhs_code (use_stmt))
4619         {
4620           case TRUNC_DIV_EXPR:
4621             new_rhs = fold_build1 (REALPART_EXPR, TREE_TYPE (op1), res);
4622             break;
4623
4624           case TRUNC_MOD_EXPR:
4625             new_rhs = fold_build1 (IMAGPART_EXPR, TREE_TYPE (op1), res);
4626             break;
4627
4628           default:
4629             gcc_unreachable ();
4630         }
4631
4632       gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
4633       gimple_assign_set_rhs_from_tree (&gsi, new_rhs);
4634       update_stmt (use_stmt);
4635     }
4636
4637   return true;
4638 }
4639
4640 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4641    its rhs, and try to convert it into a MULT_HIGHPART_EXPR.  The return
4642    value is true iff we converted the statement.  */
4643
4644 static bool
4645 convert_mult_to_highpart (gassign *stmt, gimple_stmt_iterator *gsi)
4646 {
4647   tree lhs = gimple_assign_lhs (stmt);
4648   tree stype = TREE_TYPE (lhs);
4649   tree sarg0 = gimple_assign_rhs1 (stmt);
4650   tree sarg1 = gimple_assign_rhs2 (stmt);
4651
4652   if (TREE_CODE (stype) != INTEGER_TYPE
4653       || TREE_CODE (sarg1) != INTEGER_CST
4654       || TREE_CODE (sarg0) != SSA_NAME
4655       || !tree_fits_uhwi_p (sarg1)
4656       || !has_single_use (sarg0))
4657     return false;
4658
4659   gassign *def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (sarg0));
4660   if (!def)
4661     return false;
4662
4663   enum tree_code mcode = gimple_assign_rhs_code (def);
4664   if (mcode == NOP_EXPR)
4665     {
4666       tree tmp = gimple_assign_rhs1 (def);
4667       if (TREE_CODE (tmp) != SSA_NAME || !has_single_use (tmp))
4668         return false;
4669       def = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (tmp));
4670       if (!def)
4671         return false;
4672       mcode = gimple_assign_rhs_code (def);
4673     }
4674
4675   if (mcode != WIDEN_MULT_EXPR
4676       || gimple_bb (def) != gimple_bb (stmt))
4677     return false;
4678   tree mtype = TREE_TYPE (gimple_assign_lhs (def));
4679   if (TREE_CODE (mtype) != INTEGER_TYPE
4680       || TYPE_PRECISION (mtype) != TYPE_PRECISION (stype))
4681     return false;
4682
4683   tree mop1 = gimple_assign_rhs1 (def);
4684   tree mop2 = gimple_assign_rhs2 (def);
4685   tree optype = TREE_TYPE (mop1);
4686   bool unsignedp = TYPE_UNSIGNED (optype);
4687   unsigned int prec = TYPE_PRECISION (optype);
4688
4689   if (unsignedp != TYPE_UNSIGNED (mtype)
4690       || TYPE_PRECISION (mtype) != 2 * prec)
4691     return false;
4692
4693   unsigned HOST_WIDE_INT bits = tree_to_uhwi (sarg1);
4694   if (bits < prec || bits >= 2 * prec)
4695     return false;
4696
4697   /* For the time being, require operands to have the same sign.  */
4698   if (unsignedp != TYPE_UNSIGNED (TREE_TYPE (mop2)))
4699     return false;
4700
4701   machine_mode mode = TYPE_MODE (optype);
4702   optab tab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
4703   if (optab_handler (tab, mode) == CODE_FOR_nothing)
4704     return false;
4705
4706   location_t loc = gimple_location (stmt);
4707   tree highpart1 = build_and_insert_binop (gsi, loc, "highparttmp",
4708                                            MULT_HIGHPART_EXPR, mop1, mop2);
4709   tree highpart2 = highpart1;
4710   tree ntype = optype;
4711
4712   if (TYPE_UNSIGNED (stype) != TYPE_UNSIGNED (optype))
4713     {
4714       ntype = TYPE_UNSIGNED (stype) ? unsigned_type_for (optype)
4715                                     : signed_type_for (optype);
4716       highpart2 = build_and_insert_cast (gsi, loc, ntype, highpart1);
4717     }
4718   if (bits > prec)
4719     highpart2 = build_and_insert_binop (gsi, loc, "highparttmp",
4720                                         RSHIFT_EXPR, highpart2,
4721                                         build_int_cst (ntype, bits - prec));
4722
4723   gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, highpart2);
4724   gsi_replace (gsi, new_stmt, true);
4725
4726   widen_mul_stats.highpart_mults_inserted++;
4727   return true;
4728 }
4729
4730 /* If target has spaceship<MODE>3 expander, pattern recognize
4731    <bb 2> [local count: 1073741824]:
4732    if (a_2(D) == b_3(D))
4733      goto <bb 6>; [34.00%]
4734    else
4735      goto <bb 3>; [66.00%]
4736
4737    <bb 3> [local count: 708669601]:
4738    if (a_2(D) < b_3(D))
4739      goto <bb 6>; [1.04%]
4740    else
4741      goto <bb 4>; [98.96%]
4742
4743    <bb 4> [local count: 701299439]:
4744    if (a_2(D) > b_3(D))
4745      goto <bb 5>; [48.89%]
4746    else
4747      goto <bb 6>; [51.11%]
4748
4749    <bb 5> [local count: 342865295]:
4750
4751    <bb 6> [local count: 1073741824]:
4752    and turn it into:
4753    <bb 2> [local count: 1073741824]:
4754    _1 = .SPACESHIP (a_2(D), b_3(D));
4755    if (_1 == 0)
4756      goto <bb 6>; [34.00%]
4757    else
4758      goto <bb 3>; [66.00%]
4759
4760    <bb 3> [local count: 708669601]:
4761    if (_1 == -1)
4762      goto <bb 6>; [1.04%]
4763    else
4764      goto <bb 4>; [98.96%]
4765
4766    <bb 4> [local count: 701299439]:
4767    if (_1 == 1)
4768      goto <bb 5>; [48.89%]
4769    else
4770      goto <bb 6>; [51.11%]
4771
4772    <bb 5> [local count: 342865295]:
4773
4774    <bb 6> [local count: 1073741824]:
4775    so that the backend can emit optimal comparison and
4776    conditional jump sequence.  */
4777
4778 static void
4779 optimize_spaceship (gcond *stmt)
4780 {
4781   enum tree_code code = gimple_cond_code (stmt);
4782   if (code != EQ_EXPR && code != NE_EXPR)
4783     return;
4784   tree arg1 = gimple_cond_lhs (stmt);
4785   tree arg2 = gimple_cond_rhs (stmt);
4786   if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1))
4787       || optab_handler (spaceship_optab,
4788                         TYPE_MODE (TREE_TYPE (arg1))) == CODE_FOR_nothing
4789       || operand_equal_p (arg1, arg2, 0))
4790     return;
4791
4792   basic_block bb0 = gimple_bb (stmt), bb1, bb2 = NULL;
4793   edge em1 = NULL, e1 = NULL, e2 = NULL;
4794   bb1 = EDGE_SUCC (bb0, 1)->dest;
4795   if (((EDGE_SUCC (bb0, 0)->flags & EDGE_TRUE_VALUE) != 0) ^ (code == EQ_EXPR))
4796     bb1 = EDGE_SUCC (bb0, 0)->dest;
4797
4798   gcond *g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb1));
4799   if (g == NULL
4800       || !single_pred_p (bb1)
4801       || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4802           ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4803           : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4804              || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4805       || !cond_only_block_p (bb1))
4806     return;
4807
4808   enum tree_code ccode = (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4809                           ? LT_EXPR : GT_EXPR);
4810   switch (gimple_cond_code (g))
4811     {
4812     case LT_EXPR:
4813     case LE_EXPR:
4814       break;
4815     case GT_EXPR:
4816     case GE_EXPR:
4817       ccode = ccode == LT_EXPR ? GT_EXPR : LT_EXPR;
4818       break;
4819     default:
4820       return;
4821     }
4822
4823   for (int i = 0; i < 2; ++i)
4824     {
4825       /* With NaNs, </<=/>/>= are false, so we need to look for the
4826          third comparison on the false edge from whatever non-equality
4827          comparison the second comparison is.  */
4828       if (HONOR_NANS (TREE_TYPE (arg1))
4829           && (EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0)
4830         continue;
4831
4832       bb2 = EDGE_SUCC (bb1, i)->dest;
4833       g = safe_dyn_cast <gcond *> (*gsi_last_bb (bb2));
4834       if (g == NULL
4835           || !single_pred_p (bb2)
4836           || (operand_equal_p (gimple_cond_lhs (g), arg1, 0)
4837               ? !operand_equal_p (gimple_cond_rhs (g), arg2, 0)
4838               : (!operand_equal_p (gimple_cond_lhs (g), arg2, 0)
4839                  || !operand_equal_p (gimple_cond_rhs (g), arg1, 0)))
4840           || !cond_only_block_p (bb2)
4841           || EDGE_SUCC (bb2, 0)->dest == EDGE_SUCC (bb2, 1)->dest)
4842         continue;
4843
4844       enum tree_code ccode2
4845         = (operand_equal_p (gimple_cond_lhs (g), arg1, 0) ? LT_EXPR : GT_EXPR);
4846       switch (gimple_cond_code (g))
4847         {
4848         case LT_EXPR:
4849         case LE_EXPR:
4850           break;
4851         case GT_EXPR:
4852         case GE_EXPR:
4853           ccode2 = ccode2 == LT_EXPR ? GT_EXPR : LT_EXPR;
4854           break;
4855         default:
4856           continue;
4857         }
4858       if (HONOR_NANS (TREE_TYPE (arg1)) && ccode == ccode2)
4859         continue;
4860
4861       if ((ccode == LT_EXPR)
4862           ^ ((EDGE_SUCC (bb1, i)->flags & EDGE_TRUE_VALUE) != 0))
4863         {
4864           em1 = EDGE_SUCC (bb1, 1 - i);
4865           e1 = EDGE_SUCC (bb2, 0);
4866           e2 = EDGE_SUCC (bb2, 1);
4867           if ((ccode2 == LT_EXPR) ^ ((e1->flags & EDGE_TRUE_VALUE) == 0))
4868             std::swap (e1, e2);
4869         }
4870       else
4871         {
4872           e1 = EDGE_SUCC (bb1, 1 - i);
4873           em1 = EDGE_SUCC (bb2, 0);
4874           e2 = EDGE_SUCC (bb2, 1);
4875           if ((ccode2 != LT_EXPR) ^ ((em1->flags & EDGE_TRUE_VALUE) == 0))
4876             std::swap (em1, e2);
4877         }
4878       break;
4879     }
4880
4881   if (em1 == NULL)
4882     {
4883       if ((ccode == LT_EXPR)
4884           ^ ((EDGE_SUCC (bb1, 0)->flags & EDGE_TRUE_VALUE) != 0))
4885         {
4886           em1 = EDGE_SUCC (bb1, 1);
4887           e1 = EDGE_SUCC (bb1, 0);
4888           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4889         }
4890       else
4891         {
4892           em1 = EDGE_SUCC (bb1, 0);
4893           e1 = EDGE_SUCC (bb1, 1);
4894           e2 = (e1->flags & EDGE_TRUE_VALUE) ? em1 : e1;
4895         }
4896     }
4897
4898   gcall *gc = gimple_build_call_internal (IFN_SPACESHIP, 2, arg1, arg2);
4899   tree lhs = make_ssa_name (integer_type_node);
4900   gimple_call_set_lhs (gc, lhs);
4901   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4902   gsi_insert_before (&gsi, gc, GSI_SAME_STMT);
4903
4904   gimple_cond_set_lhs (stmt, lhs);
4905   gimple_cond_set_rhs (stmt, integer_zero_node);
4906   update_stmt (stmt);
4907
4908   gcond *cond = as_a <gcond *> (*gsi_last_bb (bb1));
4909   gimple_cond_set_lhs (cond, lhs);
4910   if (em1->src == bb1 && e2 != em1)
4911     {
4912       gimple_cond_set_rhs (cond, integer_minus_one_node);
4913       gimple_cond_set_code (cond, (em1->flags & EDGE_TRUE_VALUE)
4914                                   ? EQ_EXPR : NE_EXPR);
4915     }
4916   else
4917     {
4918       gcc_assert (e1->src == bb1 && e2 != e1);
4919       gimple_cond_set_rhs (cond, integer_one_node);
4920       gimple_cond_set_code (cond, (e1->flags & EDGE_TRUE_VALUE)
4921                                   ? EQ_EXPR : NE_EXPR);
4922     }
4923   update_stmt (cond);
4924
4925   if (e2 != e1 && e2 != em1)
4926     {
4927       cond = as_a <gcond *> (*gsi_last_bb (bb2));
4928       gimple_cond_set_lhs (cond, lhs);
4929       if (em1->src == bb2)
4930         gimple_cond_set_rhs (cond, integer_minus_one_node);
4931       else
4932         {
4933           gcc_assert (e1->src == bb2);
4934           gimple_cond_set_rhs (cond, integer_one_node);
4935         }
4936       gimple_cond_set_code (cond,
4937                             (e2->flags & EDGE_TRUE_VALUE) ? NE_EXPR : EQ_EXPR);
4938       update_stmt (cond);
4939     }
4940
4941   wide_int wm1 = wi::minus_one (TYPE_PRECISION (integer_type_node));
4942   wide_int w2 = wi::two (TYPE_PRECISION (integer_type_node));
4943   value_range vr (TREE_TYPE (lhs), wm1, w2);
4944   set_range_info (lhs, vr);
4945 }
4946
4947
4948 /* Find integer multiplications where the operands are extended from
4949    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4950    or MULT_HIGHPART_EXPR where appropriate.  */
4951
4952 namespace {
4953
4954 const pass_data pass_data_optimize_widening_mul =
4955 {
4956   GIMPLE_PASS, /* type */
4957   "widening_mul", /* name */
4958   OPTGROUP_NONE, /* optinfo_flags */
4959   TV_TREE_WIDEN_MUL, /* tv_id */
4960   PROP_ssa, /* properties_required */
4961   0, /* properties_provided */
4962   0, /* properties_destroyed */
4963   0, /* todo_flags_start */
4964   TODO_update_ssa, /* todo_flags_finish */
4965 };
4966
4967 class pass_optimize_widening_mul : public gimple_opt_pass
4968 {
4969 public:
4970   pass_optimize_widening_mul (gcc::context *ctxt)
4971     : gimple_opt_pass (pass_data_optimize_widening_mul, ctxt)
4972   {}
4973
4974   /* opt_pass methods: */
4975   bool gate (function *) final override
4976     {
4977       return flag_expensive_optimizations && optimize;
4978     }
4979
4980   unsigned int execute (function *) final override;
4981
4982 }; // class pass_optimize_widening_mul
4983
4984 /* Walker class to perform the transformation in reverse dominance order. */
4985
4986 class math_opts_dom_walker : public dom_walker
4987 {
4988 public:
4989   /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4990      if walking modidifes the CFG.  */
4991
4992   math_opts_dom_walker (bool *cfg_changed_p)
4993     : dom_walker (CDI_DOMINATORS), m_last_result_set (),
4994       m_cfg_changed_p (cfg_changed_p) {}
4995
4996   /* The actual actions performed in the walk.  */
4997
4998   void after_dom_children (basic_block) final override;
4999
5000   /* Set of results of chains of multiply and add statement combinations that
5001      were not transformed into FMAs because of active deferring.  */
5002   hash_set<tree> m_last_result_set;
5003
5004   /* Pointer to a flag of the user that needs to be set if CFG has been
5005      modified.  */
5006   bool *m_cfg_changed_p;
5007 };
5008
5009 void
5010 math_opts_dom_walker::after_dom_children (basic_block bb)
5011 {
5012   gimple_stmt_iterator gsi;
5013
5014   fma_deferring_state fma_state (param_avoid_fma_max_bits > 0);
5015
5016   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
5017     {
5018       gimple *stmt = gsi_stmt (gsi);
5019       enum tree_code code;
5020
5021       if (is_gimple_assign (stmt))
5022         {
5023           code = gimple_assign_rhs_code (stmt);
5024           switch (code)
5025             {
5026             case MULT_EXPR:
5027               if (!convert_mult_to_widen (stmt, &gsi)
5028                   && !convert_expand_mult_copysign (stmt, &gsi)
5029                   && convert_mult_to_fma (stmt,
5030                                           gimple_assign_rhs1 (stmt),
5031                                           gimple_assign_rhs2 (stmt),
5032                                           &fma_state))
5033                 {
5034                   gsi_remove (&gsi, true);
5035                   release_defs (stmt);
5036                   continue;
5037                 }
5038               match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5039               break;
5040
5041             case PLUS_EXPR:
5042             case MINUS_EXPR:
5043               if (!convert_plusminus_to_widen (&gsi, stmt, code))
5044                 match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
5045               break;
5046
5047             case BIT_NOT_EXPR:
5048               if (match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p))
5049                 continue;
5050               break;
5051
5052             case TRUNC_MOD_EXPR:
5053               convert_to_divmod (as_a<gassign *> (stmt));
5054               break;
5055
5056             case RSHIFT_EXPR:
5057               convert_mult_to_highpart (as_a<gassign *> (stmt), &gsi);
5058               break;
5059
5060             default:;
5061             }
5062         }
5063       else if (is_gimple_call (stmt))
5064         {
5065           switch (gimple_call_combined_fn (stmt))
5066             {
5067             CASE_CFN_POW:
5068               if (gimple_call_lhs (stmt)
5069                   && TREE_CODE (gimple_call_arg (stmt, 1)) == REAL_CST
5070                   && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt, 1)),
5071                                  &dconst2)
5072                   && convert_mult_to_fma (stmt,
5073                                           gimple_call_arg (stmt, 0),
5074                                           gimple_call_arg (stmt, 0),
5075                                           &fma_state))
5076                 {
5077                   unlink_stmt_vdef (stmt);
5078                   if (gsi_remove (&gsi, true)
5079                       && gimple_purge_dead_eh_edges (bb))
5080                     *m_cfg_changed_p = true;
5081                   release_defs (stmt);
5082                   continue;
5083                 }
5084               break;
5085
5086             case CFN_COND_MUL:
5087               if (convert_mult_to_fma (stmt,
5088                                        gimple_call_arg (stmt, 1),
5089                                        gimple_call_arg (stmt, 2),
5090                                        &fma_state,
5091                                        gimple_call_arg (stmt, 0)))
5092
5093                 {
5094                   gsi_remove (&gsi, true);
5095                   release_defs (stmt);
5096                   continue;
5097                 }
5098               break;
5099
5100             case CFN_LAST:
5101               cancel_fma_deferring (&fma_state);
5102               break;
5103
5104             default:
5105               break;
5106             }
5107         }
5108       else if (gimple_code (stmt) == GIMPLE_COND)
5109         optimize_spaceship (as_a <gcond *> (stmt));
5110       gsi_next (&gsi);
5111     }
5112   if (fma_state.m_deferring_p
5113       && fma_state.m_initial_phi)
5114     {
5115       gcc_checking_assert (fma_state.m_last_result);
5116       if (!last_fma_candidate_feeds_initial_phi (&fma_state,
5117                                                  &m_last_result_set))
5118         cancel_fma_deferring (&fma_state);
5119       else
5120         m_last_result_set.add (fma_state.m_last_result);
5121     }
5122 }
5123
5124
5125 unsigned int
5126 pass_optimize_widening_mul::execute (function *fun)
5127 {
5128   bool cfg_changed = false;
5129
5130   memset (&widen_mul_stats, 0, sizeof (widen_mul_stats));
5131   calculate_dominance_info (CDI_DOMINATORS);
5132   renumber_gimple_stmt_uids (cfun);
5133
5134   math_opts_dom_walker (&cfg_changed).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun));
5135
5136   statistics_counter_event (fun, "widening multiplications inserted",
5137                             widen_mul_stats.widen_mults_inserted);
5138   statistics_counter_event (fun, "widening maccs inserted",
5139                             widen_mul_stats.maccs_inserted);
5140   statistics_counter_event (fun, "fused multiply-adds inserted",
5141                             widen_mul_stats.fmas_inserted);
5142   statistics_counter_event (fun, "divmod calls inserted",
5143                             widen_mul_stats.divmod_calls_inserted);
5144   statistics_counter_event (fun, "highpart multiplications inserted",
5145                             widen_mul_stats.highpart_mults_inserted);
5146
5147   return cfg_changed ? TODO_cleanup_cfg : 0;
5148 }
5149
5150 } // anon namespace
5151
5152 gimple_opt_pass *
5153 make_pass_optimize_widening_mul (gcc::context *ctxt)
5154 {
5155   return new pass_optimize_widening_mul (ctxt);
5156 }