1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-iterator.h"
104 #include "gimple-fold.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
116 #include "targhooks.h"
118 #include "tree-ssa-math-opts.h"
120 /* This structure represents one basic block that either computes a
121 division, or is a common dominator for basic block that compute a
124 /* The basic block represented by this structure. */
125 basic_block bb
= basic_block();
127 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
129 tree recip_def
= tree();
131 /* If non-NULL, the SSA_NAME holding the definition for a squared
132 reciprocal inserted in BB. */
133 tree square_recip_def
= tree();
135 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
136 was inserted in BB. */
137 gimple
*recip_def_stmt
= nullptr;
139 /* Pointer to a list of "struct occurrence"s for blocks dominated
141 struct occurrence
*children
= nullptr;
143 /* Pointer to the next "struct occurrence"s in the list of blocks
144 sharing a common dominator. */
145 struct occurrence
*next
= nullptr;
147 /* The number of divisions that are in BB before compute_merit. The
148 number of divisions that are in BB or post-dominate it after
150 int num_divisions
= 0;
152 /* True if the basic block has a division, false if it is a common
153 dominator for basic blocks that do. If it is false and trapping
154 math is active, BB is not a candidate for inserting a reciprocal. */
155 bool bb_has_division
= false;
157 /* Construct a struct occurrence for basic block BB, and whose
158 children list is headed by CHILDREN. */
159 occurrence (basic_block bb
, struct occurrence
*children
)
160 : bb (bb
), children (children
)
165 /* Destroy a struct occurrence and remove it from its basic block. */
171 /* Allocate memory for a struct occurrence from OCC_POOL. */
172 static void* operator new (size_t);
174 /* Return memory for a struct occurrence to OCC_POOL. */
175 static void operator delete (void*, size_t);
180 /* Number of 1.0/X ops inserted. */
183 /* Number of 1.0/FUNC ops inserted. */
189 /* Number of cexpi calls inserted. */
192 /* Number of conversions removed. */
199 /* Number of widening multiplication ops inserted. */
200 int widen_mults_inserted
;
202 /* Number of integer multiply-and-accumulate ops inserted. */
205 /* Number of fp fused multiply-add ops inserted. */
208 /* Number of divmod calls inserted. */
209 int divmod_calls_inserted
;
211 /* Number of highpart multiplication ops inserted. */
212 int highpart_mults_inserted
;
215 /* The instance of "struct occurrence" representing the highest
216 interesting block in the dominator tree. */
217 static struct occurrence
*occ_head
;
219 /* Allocation pool for getting instances of "struct occurrence". */
220 static object_allocator
<occurrence
> *occ_pool
;
222 void* occurrence::operator new (size_t n
)
224 gcc_assert (n
== sizeof(occurrence
));
225 return occ_pool
->allocate_raw ();
228 void occurrence::operator delete (void *occ
, size_t n
)
230 gcc_assert (n
== sizeof(occurrence
));
231 occ_pool
->remove_raw (occ
);
234 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
235 list of "struct occurrence"s, one per basic block, having IDOM as
236 their common dominator.
238 We try to insert NEW_OCC as deep as possible in the tree, and we also
239 insert any other block that is a common dominator for BB and one
240 block already in the tree. */
243 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
244 struct occurrence
**p_head
)
246 struct occurrence
*occ
, **p_occ
;
248 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
250 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
251 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
254 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
257 occ
->next
= new_occ
->children
;
258 new_occ
->children
= occ
;
260 /* Try the next block (it may as well be dominated by BB). */
263 else if (dom
== occ_bb
)
265 /* OCC_BB dominates BB. Tail recurse to look deeper. */
266 insert_bb (new_occ
, dom
, &occ
->children
);
270 else if (dom
!= idom
)
272 gcc_assert (!dom
->aux
);
274 /* There is a dominator between IDOM and BB, add it and make
275 two children out of NEW_OCC and OCC. First, remove OCC from
281 /* None of the previous blocks has DOM as a dominator: if we tail
282 recursed, we would reexamine them uselessly. Just switch BB with
283 DOM, and go on looking for blocks dominated by DOM. */
284 new_occ
= new occurrence (dom
, new_occ
);
289 /* Nothing special, go on with the next element. */
294 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
295 new_occ
->next
= *p_head
;
299 /* Register that we found a division in BB.
300 IMPORTANCE is a measure of how much weighting to give
301 that division. Use IMPORTANCE = 2 to register a single
302 division. If the division is going to be found multiple
303 times use 1 (as it is with squares). */
306 register_division_in (basic_block bb
, int importance
)
308 struct occurrence
*occ
;
310 occ
= (struct occurrence
*) bb
->aux
;
313 occ
= new occurrence (bb
, NULL
);
314 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
317 occ
->bb_has_division
= true;
318 occ
->num_divisions
+= importance
;
322 /* Compute the number of divisions that postdominate each block in OCC and
326 compute_merit (struct occurrence
*occ
)
328 struct occurrence
*occ_child
;
329 basic_block dom
= occ
->bb
;
331 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
334 if (occ_child
->children
)
335 compute_merit (occ_child
);
338 bb
= single_noncomplex_succ (dom
);
342 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
343 occ
->num_divisions
+= occ_child
->num_divisions
;
348 /* Return whether USE_STMT is a floating-point division by DEF. */
350 is_division_by (gimple
*use_stmt
, tree def
)
352 return is_gimple_assign (use_stmt
)
353 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
354 && gimple_assign_rhs2 (use_stmt
) == def
355 /* Do not recognize x / x as valid division, as we are getting
356 confused later by replacing all immediate uses x in such
358 && gimple_assign_rhs1 (use_stmt
) != def
359 && !stmt_can_throw_internal (cfun
, use_stmt
);
362 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
364 is_mult_by (gimple
*use_stmt
, tree def
, tree a
)
366 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
367 && gimple_assign_rhs_code (use_stmt
) == MULT_EXPR
)
369 tree op0
= gimple_assign_rhs1 (use_stmt
);
370 tree op1
= gimple_assign_rhs2 (use_stmt
);
372 return (op0
== def
&& op1
== a
)
373 || (op0
== a
&& op1
== def
);
378 /* Return whether USE_STMT is DEF * DEF. */
380 is_square_of (gimple
*use_stmt
, tree def
)
382 return is_mult_by (use_stmt
, def
, def
);
385 /* Return whether USE_STMT is a floating-point division by
388 is_division_by_square (gimple
*use_stmt
, tree def
)
390 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
391 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
392 && gimple_assign_rhs1 (use_stmt
) != gimple_assign_rhs2 (use_stmt
)
393 && !stmt_can_throw_internal (cfun
, use_stmt
))
395 tree denominator
= gimple_assign_rhs2 (use_stmt
);
396 if (TREE_CODE (denominator
) == SSA_NAME
)
397 return is_square_of (SSA_NAME_DEF_STMT (denominator
), def
);
402 /* Walk the subset of the dominator tree rooted at OCC, setting the
403 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
404 the given basic block. The field may be left NULL, of course,
405 if it is not possible or profitable to do the optimization.
407 DEF_BSI is an iterator pointing at the statement defining DEF.
408 If RECIP_DEF is set, a dominator already has a computation that can
411 If should_insert_square_recip is set, then this also inserts
412 the square of the reciprocal immediately after the definition
413 of the reciprocal. */
416 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
417 tree def
, tree recip_def
, tree square_recip_def
,
418 int should_insert_square_recip
, int threshold
)
421 gassign
*new_stmt
, *new_square_stmt
;
422 gimple_stmt_iterator gsi
;
423 struct occurrence
*occ_child
;
426 && (occ
->bb_has_division
|| !flag_trapping_math
)
427 /* Divide by two as all divisions are counted twice in
429 && occ
->num_divisions
/ 2 >= threshold
)
431 /* Make a variable with the replacement and substitute it. */
432 type
= TREE_TYPE (def
);
433 recip_def
= create_tmp_reg (type
, "reciptmp");
434 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
435 build_one_cst (type
), def
);
437 if (should_insert_square_recip
)
439 square_recip_def
= create_tmp_reg (type
, "powmult_reciptmp");
440 new_square_stmt
= gimple_build_assign (square_recip_def
, MULT_EXPR
,
441 recip_def
, recip_def
);
444 if (occ
->bb_has_division
)
446 /* Case 1: insert before an existing division. */
447 gsi
= gsi_after_labels (occ
->bb
);
448 while (!gsi_end_p (gsi
)
449 && (!is_division_by (gsi_stmt (gsi
), def
))
450 && (!is_division_by_square (gsi_stmt (gsi
), def
)))
453 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
454 if (should_insert_square_recip
)
455 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
457 else if (def_gsi
&& occ
->bb
== gsi_bb (*def_gsi
))
459 /* Case 2: insert right after the definition. Note that this will
460 never happen if the definition statement can throw, because in
461 that case the sole successor of the statement's basic block will
462 dominate all the uses as well. */
463 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
464 if (should_insert_square_recip
)
465 gsi_insert_after (def_gsi
, new_square_stmt
, GSI_NEW_STMT
);
469 /* Case 3: insert in a basic block not containing defs/uses. */
470 gsi
= gsi_after_labels (occ
->bb
);
471 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
472 if (should_insert_square_recip
)
473 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
476 reciprocal_stats
.rdivs_inserted
++;
478 occ
->recip_def_stmt
= new_stmt
;
481 occ
->recip_def
= recip_def
;
482 occ
->square_recip_def
= square_recip_def
;
483 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
484 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
,
485 square_recip_def
, should_insert_square_recip
,
489 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
490 Take as argument the use for (x * x). */
492 replace_reciprocal_squares (use_operand_p use_p
)
494 gimple
*use_stmt
= USE_STMT (use_p
);
495 basic_block bb
= gimple_bb (use_stmt
);
496 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
498 if (optimize_bb_for_speed_p (bb
) && occ
->square_recip_def
501 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
502 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
503 gimple_assign_set_rhs2 (use_stmt
, occ
->square_recip_def
);
504 SET_USE (use_p
, occ
->square_recip_def
);
505 fold_stmt_inplace (&gsi
);
506 update_stmt (use_stmt
);
511 /* Replace the division at USE_P with a multiplication by the reciprocal, if
515 replace_reciprocal (use_operand_p use_p
)
517 gimple
*use_stmt
= USE_STMT (use_p
);
518 basic_block bb
= gimple_bb (use_stmt
);
519 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
521 if (optimize_bb_for_speed_p (bb
)
522 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
524 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
525 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
526 SET_USE (use_p
, occ
->recip_def
);
527 fold_stmt_inplace (&gsi
);
528 update_stmt (use_stmt
);
533 /* Free OCC and return one more "struct occurrence" to be freed. */
535 static struct occurrence
*
536 free_bb (struct occurrence
*occ
)
538 struct occurrence
*child
, *next
;
540 /* First get the two pointers hanging off OCC. */
542 child
= occ
->children
;
545 /* Now ensure that we don't recurse unless it is necessary. */
551 next
= free_bb (next
);
557 /* Transform sequences like
567 depending on the uses of x, r1, r2. This removes one multiplication and
568 allows the sqrt and division operations to execute in parallel.
569 DEF_GSI is the gsi of the initial division by sqrt that defines
570 DEF (x in the example above). */
573 optimize_recip_sqrt (gimple_stmt_iterator
*def_gsi
, tree def
)
576 imm_use_iterator use_iter
;
577 gimple
*stmt
= gsi_stmt (*def_gsi
);
579 tree orig_sqrt_ssa_name
= gimple_assign_rhs2 (stmt
);
580 tree div_rhs1
= gimple_assign_rhs1 (stmt
);
582 if (TREE_CODE (orig_sqrt_ssa_name
) != SSA_NAME
583 || TREE_CODE (div_rhs1
) != REAL_CST
584 || !real_equal (&TREE_REAL_CST (div_rhs1
), &dconst1
))
588 = dyn_cast
<gcall
*> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name
));
590 if (!sqrt_stmt
|| !gimple_call_lhs (sqrt_stmt
))
593 switch (gimple_call_combined_fn (sqrt_stmt
))
602 tree a
= gimple_call_arg (sqrt_stmt
, 0);
604 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
606 /* Statements that use x in x * x. */
607 auto_vec
<gimple
*> sqr_stmts
;
608 /* Statements that use x in a * x. */
609 auto_vec
<gimple
*> mult_stmts
;
610 bool has_other_use
= false;
611 bool mult_on_main_path
= false;
613 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, x
)
615 if (is_gimple_debug (use_stmt
))
617 if (is_square_of (use_stmt
, x
))
619 sqr_stmts
.safe_push (use_stmt
);
620 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
621 mult_on_main_path
= true;
623 else if (is_mult_by (use_stmt
, x
, a
))
625 mult_stmts
.safe_push (use_stmt
);
626 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
627 mult_on_main_path
= true;
630 has_other_use
= true;
633 /* In the x * x and a * x cases we just rewire stmt operands or
634 remove multiplications. In the has_other_use case we introduce
635 a multiplication so make sure we don't introduce a multiplication
636 on a path where there was none. */
637 if (has_other_use
&& !mult_on_main_path
)
640 if (sqr_stmts
.is_empty () && mult_stmts
.is_empty ())
643 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
644 to be able to compose it from the sqr and mult cases. */
645 if (has_other_use
&& (sqr_stmts
.is_empty () || mult_stmts
.is_empty ()))
650 fprintf (dump_file
, "Optimizing reciprocal sqrt multiplications of\n");
651 print_gimple_stmt (dump_file
, sqrt_stmt
, 0, TDF_NONE
);
652 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
653 fprintf (dump_file
, "\n");
656 bool delete_div
= !has_other_use
;
657 tree sqr_ssa_name
= NULL_TREE
;
658 if (!sqr_stmts
.is_empty ())
660 /* r1 = x * x. Transform the original
667 = make_temp_ssa_name (TREE_TYPE (a
), NULL
, "recip_sqrt_sqr");
671 fprintf (dump_file
, "Replacing original division\n");
672 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
673 fprintf (dump_file
, "with new division\n");
676 = gimple_build_assign (sqr_ssa_name
, gimple_assign_rhs_code (stmt
),
677 gimple_assign_rhs1 (stmt
), a
);
678 gsi_insert_before (def_gsi
, stmt
, GSI_SAME_STMT
);
679 gsi_remove (def_gsi
, true);
680 *def_gsi
= gsi_for_stmt (stmt
);
681 fold_stmt_inplace (def_gsi
);
685 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
690 FOR_EACH_VEC_ELT (sqr_stmts
, i
, sqr_stmt
)
692 gimple_stmt_iterator gsi2
= gsi_for_stmt (sqr_stmt
);
693 gimple_assign_set_rhs_from_tree (&gsi2
, sqr_ssa_name
);
694 update_stmt (sqr_stmt
);
697 if (!mult_stmts
.is_empty ())
699 /* r2 = a * x. Transform this into:
700 r2 = t (The original sqrt (a)). */
702 gimple
*mult_stmt
= NULL
;
703 FOR_EACH_VEC_ELT (mult_stmts
, i
, mult_stmt
)
705 gimple_stmt_iterator gsi2
= gsi_for_stmt (mult_stmt
);
709 fprintf (dump_file
, "Replacing squaring multiplication\n");
710 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
711 fprintf (dump_file
, "with assignment\n");
713 gimple_assign_set_rhs_from_tree (&gsi2
, orig_sqrt_ssa_name
);
714 fold_stmt_inplace (&gsi2
);
715 update_stmt (mult_stmt
);
717 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
723 /* Using the two temporaries tmp1, tmp2 from above
724 the original x is now:
726 gcc_assert (orig_sqrt_ssa_name
);
727 gcc_assert (sqr_ssa_name
);
730 = gimple_build_assign (x
, MULT_EXPR
,
731 orig_sqrt_ssa_name
, sqr_ssa_name
);
732 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
737 /* Remove the original division. */
738 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
739 gsi_remove (&gsi2
, true);
743 release_ssa_name (x
);
746 /* Look for floating-point divisions among DEF's uses, and try to
747 replace them by multiplications with the reciprocal. Add
748 as many statements computing the reciprocal as needed.
750 DEF must be a GIMPLE register of a floating-point type. */
753 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
755 use_operand_p use_p
, square_use_p
;
756 imm_use_iterator use_iter
, square_use_iter
;
758 struct occurrence
*occ
;
761 int square_recip_count
= 0;
762 int sqrt_recip_count
= 0;
764 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && TREE_CODE (def
) == SSA_NAME
);
765 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
767 /* If DEF is a square (x * x), count the number of divisions by x.
768 If there are more divisions by x than by (DEF * DEF), prefer to optimize
769 the reciprocal of x instead of DEF. This improves cases like:
774 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
775 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
777 if (is_gimple_assign (def_stmt
)
778 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
779 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
780 && gimple_assign_rhs1 (def_stmt
) == gimple_assign_rhs2 (def_stmt
))
782 tree op0
= gimple_assign_rhs1 (def_stmt
);
784 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, op0
)
786 gimple
*use_stmt
= USE_STMT (use_p
);
787 if (is_division_by (use_stmt
, op0
))
792 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
794 gimple
*use_stmt
= USE_STMT (use_p
);
795 if (is_division_by (use_stmt
, def
))
797 register_division_in (gimple_bb (use_stmt
), 2);
801 if (is_square_of (use_stmt
, def
))
803 square_def
= gimple_assign_lhs (use_stmt
);
804 FOR_EACH_IMM_USE_FAST (square_use_p
, square_use_iter
, square_def
)
806 gimple
*square_use_stmt
= USE_STMT (square_use_p
);
807 if (is_division_by (square_use_stmt
, square_def
))
809 /* This is executed twice for each division by a square. */
810 register_division_in (gimple_bb (square_use_stmt
), 1);
811 square_recip_count
++;
817 /* Square reciprocals were counted twice above. */
818 square_recip_count
/= 2;
820 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
821 if (sqrt_recip_count
> square_recip_count
)
824 /* Do the expensive part only if we can hope to optimize something. */
825 if (count
+ square_recip_count
>= threshold
&& count
>= 1)
828 for (occ
= occ_head
; occ
; occ
= occ
->next
)
831 insert_reciprocals (def_gsi
, occ
, def
, NULL
, NULL
,
832 square_recip_count
, threshold
);
835 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
837 if (is_division_by (use_stmt
, def
))
839 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
840 replace_reciprocal (use_p
);
842 else if (square_recip_count
> 0 && is_square_of (use_stmt
, def
))
844 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
846 /* Find all uses of the square that are divisions and
847 * replace them by multiplications with the inverse. */
848 imm_use_iterator square_iterator
;
849 gimple
*powmult_use_stmt
= USE_STMT (use_p
);
850 tree powmult_def_name
= gimple_assign_lhs (powmult_use_stmt
);
852 FOR_EACH_IMM_USE_STMT (powmult_use_stmt
,
853 square_iterator
, powmult_def_name
)
854 FOR_EACH_IMM_USE_ON_STMT (square_use_p
, square_iterator
)
856 gimple
*powmult_use_stmt
= USE_STMT (square_use_p
);
857 if (is_division_by (powmult_use_stmt
, powmult_def_name
))
858 replace_reciprocal_squares (square_use_p
);
866 for (occ
= occ_head
; occ
; )
872 /* Return an internal function that implements the reciprocal of CALL,
873 or IFN_LAST if there is no such function that the target supports. */
876 internal_fn_reciprocal (gcall
*call
)
880 switch (gimple_call_combined_fn (call
))
891 tree_pair types
= direct_internal_fn_types (ifn
, call
);
892 if (!direct_internal_fn_supported_p (ifn
, types
, OPTIMIZE_FOR_SPEED
))
898 /* Go through all the floating-point SSA_NAMEs, and call
899 execute_cse_reciprocals_1 on each of them. */
902 const pass_data pass_data_cse_reciprocals
=
904 GIMPLE_PASS
, /* type */
906 OPTGROUP_NONE
, /* optinfo_flags */
907 TV_TREE_RECIP
, /* tv_id */
908 PROP_ssa
, /* properties_required */
909 0, /* properties_provided */
910 0, /* properties_destroyed */
911 0, /* todo_flags_start */
912 TODO_update_ssa
, /* todo_flags_finish */
915 class pass_cse_reciprocals
: public gimple_opt_pass
918 pass_cse_reciprocals (gcc::context
*ctxt
)
919 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
922 /* opt_pass methods: */
923 bool gate (function
*) final override
925 return optimize
&& flag_reciprocal_math
;
927 unsigned int execute (function
*) final override
;
929 }; // class pass_cse_reciprocals
932 pass_cse_reciprocals::execute (function
*fun
)
937 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
939 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
940 calculate_dominance_info (CDI_DOMINATORS
);
941 calculate_dominance_info (CDI_POST_DOMINATORS
);
944 FOR_EACH_BB_FN (bb
, fun
)
945 gcc_assert (!bb
->aux
);
947 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
948 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
949 && is_gimple_reg (arg
))
951 tree name
= ssa_default_def (fun
, arg
);
953 execute_cse_reciprocals_1 (NULL
, name
);
956 FOR_EACH_BB_FN (bb
, fun
)
960 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
963 gphi
*phi
= gsi
.phi ();
964 def
= PHI_RESULT (phi
);
965 if (! virtual_operand_p (def
)
966 && FLOAT_TYPE_P (TREE_TYPE (def
)))
967 execute_cse_reciprocals_1 (NULL
, def
);
970 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
973 gimple
*stmt
= gsi_stmt (gsi
);
975 if (gimple_has_lhs (stmt
)
976 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
977 && FLOAT_TYPE_P (TREE_TYPE (def
))
978 && TREE_CODE (def
) == SSA_NAME
)
980 execute_cse_reciprocals_1 (&gsi
, def
);
981 stmt
= gsi_stmt (gsi
);
982 if (flag_unsafe_math_optimizations
983 && is_gimple_assign (stmt
)
984 && gimple_assign_lhs (stmt
) == def
985 && !stmt_can_throw_internal (cfun
, stmt
)
986 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
987 optimize_recip_sqrt (&gsi
, def
);
991 if (optimize_bb_for_size_p (bb
))
994 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
995 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
998 gimple
*stmt
= gsi_stmt (gsi
);
1000 if (is_gimple_assign (stmt
)
1001 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
1003 tree arg1
= gimple_assign_rhs2 (stmt
);
1006 if (TREE_CODE (arg1
) != SSA_NAME
)
1009 stmt1
= SSA_NAME_DEF_STMT (arg1
);
1011 if (is_gimple_call (stmt1
)
1012 && gimple_call_lhs (stmt1
))
1015 imm_use_iterator ui
;
1016 use_operand_p use_p
;
1017 tree fndecl
= NULL_TREE
;
1019 gcall
*call
= as_a
<gcall
*> (stmt1
);
1020 internal_fn ifn
= internal_fn_reciprocal (call
);
1021 if (ifn
== IFN_LAST
)
1023 fndecl
= gimple_call_fndecl (call
);
1025 || !fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
1027 fndecl
= targetm
.builtin_reciprocal (fndecl
);
1032 /* Check that all uses of the SSA name are divisions,
1033 otherwise replacing the defining statement will do
1036 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
1038 gimple
*stmt2
= USE_STMT (use_p
);
1039 if (is_gimple_debug (stmt2
))
1041 if (!is_gimple_assign (stmt2
)
1042 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
1043 || gimple_assign_rhs1 (stmt2
) == arg1
1044 || gimple_assign_rhs2 (stmt2
) != arg1
)
1053 gimple_replace_ssa_lhs (call
, arg1
);
1054 if (gimple_call_internal_p (call
) != (ifn
!= IFN_LAST
))
1056 auto_vec
<tree
, 4> args
;
1057 for (unsigned int i
= 0;
1058 i
< gimple_call_num_args (call
); i
++)
1059 args
.safe_push (gimple_call_arg (call
, i
));
1061 if (ifn
== IFN_LAST
)
1062 stmt2
= gimple_build_call_vec (fndecl
, args
);
1064 stmt2
= gimple_build_call_internal_vec (ifn
, args
);
1065 gimple_call_set_lhs (stmt2
, arg1
);
1066 gimple_move_vops (stmt2
, call
);
1067 gimple_call_set_nothrow (stmt2
,
1068 gimple_call_nothrow_p (call
));
1069 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
1070 gsi_replace (&gsi2
, stmt2
, true);
1074 if (ifn
== IFN_LAST
)
1075 gimple_call_set_fndecl (call
, fndecl
);
1077 gimple_call_set_internal_fn (call
, ifn
);
1080 reciprocal_stats
.rfuncs_inserted
++;
1082 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
1084 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1085 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
1086 fold_stmt_inplace (&gsi
);
1094 statistics_counter_event (fun
, "reciprocal divs inserted",
1095 reciprocal_stats
.rdivs_inserted
);
1096 statistics_counter_event (fun
, "reciprocal functions inserted",
1097 reciprocal_stats
.rfuncs_inserted
);
1099 free_dominance_info (CDI_DOMINATORS
);
1100 free_dominance_info (CDI_POST_DOMINATORS
);
1108 make_pass_cse_reciprocals (gcc::context
*ctxt
)
1110 return new pass_cse_reciprocals (ctxt
);
1113 /* If NAME is the result of a type conversion, look for other
1114 equivalent dominating or dominated conversions, and replace all
1115 uses with the earliest dominating name, removing the redundant
1116 conversions. Return the prevailing name. */
1119 execute_cse_conv_1 (tree name
, bool *cfg_changed
)
1121 if (SSA_NAME_IS_DEFAULT_DEF (name
)
1122 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name
))
1125 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
1127 if (!gimple_assign_cast_p (def_stmt
))
1130 tree src
= gimple_assign_rhs1 (def_stmt
);
1132 if (TREE_CODE (src
) != SSA_NAME
)
1135 imm_use_iterator use_iter
;
1138 /* Find the earliest dominating def. */
1139 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1141 if (use_stmt
== def_stmt
1142 || !gimple_assign_cast_p (use_stmt
))
1145 tree lhs
= gimple_assign_lhs (use_stmt
);
1147 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1148 || (gimple_assign_rhs1 (use_stmt
)
1149 != gimple_assign_rhs1 (def_stmt
))
1150 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1154 if (gimple_bb (def_stmt
) == gimple_bb (use_stmt
))
1156 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1157 while (!gsi_end_p (gsi
) && gsi_stmt (gsi
) != def_stmt
)
1159 use_dominates
= !gsi_end_p (gsi
);
1161 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
),
1162 gimple_bb (def_stmt
)))
1163 use_dominates
= false;
1164 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (def_stmt
),
1165 gimple_bb (use_stmt
)))
1166 use_dominates
= true;
1172 std::swap (name
, lhs
);
1173 std::swap (def_stmt
, use_stmt
);
1177 /* Now go through all uses of SRC again, replacing the equivalent
1178 dominated conversions. We may replace defs that were not
1179 dominated by the then-prevailing defs when we first visited
1181 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1183 if (use_stmt
== def_stmt
1184 || !gimple_assign_cast_p (use_stmt
))
1187 tree lhs
= gimple_assign_lhs (use_stmt
);
1189 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1190 || (gimple_assign_rhs1 (use_stmt
)
1191 != gimple_assign_rhs1 (def_stmt
))
1192 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1195 basic_block use_bb
= gimple_bb (use_stmt
);
1196 if (gimple_bb (def_stmt
) == use_bb
1197 || dominated_by_p (CDI_DOMINATORS
, use_bb
, gimple_bb (def_stmt
)))
1199 sincos_stats
.conv_removed
++;
1201 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1202 replace_uses_by (lhs
, name
);
1203 if (gsi_remove (&gsi
, true)
1204 && gimple_purge_dead_eh_edges (use_bb
))
1205 *cfg_changed
= true;
1206 release_defs (use_stmt
);
1213 /* Records an occurrence at statement USE_STMT in the vector of trees
1214 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1215 is not yet initialized. Returns true if the occurrence was pushed on
1216 the vector. Adjusts *TOP_BB to be the basic block dominating all
1217 statements in the vector. */
1220 maybe_record_sincos (vec
<gimple
*> *stmts
,
1221 basic_block
*top_bb
, gimple
*use_stmt
)
1223 basic_block use_bb
= gimple_bb (use_stmt
);
1225 && (*top_bb
== use_bb
1226 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
1227 stmts
->safe_push (use_stmt
);
1229 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
1231 stmts
->safe_push (use_stmt
);
1240 /* Look for sin, cos and cexpi calls with the same argument NAME and
1241 create a single call to cexpi CSEing the result in this case.
1242 We first walk over all immediate uses of the argument collecting
1243 statements that we can CSE in a vector and in a second pass replace
1244 the statement rhs with a REALPART or IMAGPART expression on the
1245 result of the cexpi call we insert before the use statement that
1246 dominates all other candidates. */
1249 execute_cse_sincos_1 (tree name
)
1251 gimple_stmt_iterator gsi
;
1252 imm_use_iterator use_iter
;
1253 tree fndecl
, res
, type
= NULL_TREE
;
1254 gimple
*def_stmt
, *use_stmt
, *stmt
;
1255 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
1256 auto_vec
<gimple
*> stmts
;
1257 basic_block top_bb
= NULL
;
1259 bool cfg_changed
= false;
1261 name
= execute_cse_conv_1 (name
, &cfg_changed
);
1263 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
1265 if (gimple_code (use_stmt
) != GIMPLE_CALL
1266 || !gimple_call_lhs (use_stmt
))
1269 switch (gimple_call_combined_fn (use_stmt
))
1272 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1276 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1280 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1287 tree t
= mathfn_built_in_type (gimple_call_combined_fn (use_stmt
));
1291 t
= TREE_TYPE (name
);
1293 /* This checks that NAME has the right type in the first round,
1294 and, in subsequent rounds, that the built_in type is the same
1295 type, or a compatible type. */
1296 if (type
!= t
&& !types_compatible_p (type
, t
))
1299 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
1302 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1303 the name def statement. */
1304 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
1307 stmt
= gimple_build_call (fndecl
, 1, name
);
1308 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
1309 gimple_call_set_lhs (stmt
, res
);
1311 def_stmt
= SSA_NAME_DEF_STMT (name
);
1312 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
1313 && gimple_code (def_stmt
) != GIMPLE_PHI
1314 && gimple_bb (def_stmt
) == top_bb
)
1316 gsi
= gsi_for_stmt (def_stmt
);
1317 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1321 gsi
= gsi_after_labels (top_bb
);
1322 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1324 sincos_stats
.inserted
++;
1326 /* And adjust the recorded old call sites. */
1327 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
1331 switch (gimple_call_combined_fn (use_stmt
))
1334 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
1338 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
1349 /* Replace call with a copy. */
1350 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
1352 gsi
= gsi_for_stmt (use_stmt
);
1353 gsi_replace (&gsi
, stmt
, true);
1354 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
1361 /* To evaluate powi(x,n), the floating point value x raised to the
1362 constant integer exponent n, we use a hybrid algorithm that
1363 combines the "window method" with look-up tables. For an
1364 introduction to exponentiation algorithms and "addition chains",
1365 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1366 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1367 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1368 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1370 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1371 multiplications to inline before calling the system library's pow
1372 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1373 so this default never requires calling pow, powf or powl. */
1375 #ifndef POWI_MAX_MULTS
1376 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1379 /* The size of the "optimal power tree" lookup table. All
1380 exponents less than this value are simply looked up in the
1381 powi_table below. This threshold is also used to size the
1382 cache of pseudo registers that hold intermediate results. */
1383 #define POWI_TABLE_SIZE 256
1385 /* The size, in bits of the window, used in the "window method"
1386 exponentiation algorithm. This is equivalent to a radix of
1387 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1388 #define POWI_WINDOW_SIZE 3
1390 /* The following table is an efficient representation of an
1391 "optimal power tree". For each value, i, the corresponding
1392 value, j, in the table states than an optimal evaluation
1393 sequence for calculating pow(x,i) can be found by evaluating
1394 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1395 100 integers is given in Knuth's "Seminumerical algorithms". */
1397 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
1399 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1400 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1401 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1402 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1403 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1404 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1405 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1406 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1407 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1408 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1409 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1410 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1411 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1412 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1413 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1414 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1415 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1416 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1417 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1418 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1419 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1420 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1421 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1422 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1423 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1424 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1425 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1426 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1427 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1428 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1429 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1430 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1434 /* Return the number of multiplications required to calculate
1435 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1436 subroutine of powi_cost. CACHE is an array indicating
1437 which exponents have already been calculated. */
1440 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
1442 /* If we've already calculated this exponent, then this evaluation
1443 doesn't require any additional multiplications. */
1448 return powi_lookup_cost (n
- powi_table
[n
], cache
)
1449 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
1452 /* Return the number of multiplications required to calculate
1453 powi(x,n) for an arbitrary x, given the exponent N. This
1454 function needs to be kept in sync with powi_as_mults below. */
1457 powi_cost (HOST_WIDE_INT n
)
1459 bool cache
[POWI_TABLE_SIZE
];
1460 unsigned HOST_WIDE_INT digit
;
1461 unsigned HOST_WIDE_INT val
;
1467 /* Ignore the reciprocal when calculating the cost. */
1470 /* Initialize the exponent cache. */
1471 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
1476 while (val
>= POWI_TABLE_SIZE
)
1480 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
1481 result
+= powi_lookup_cost (digit
, cache
)
1482 + POWI_WINDOW_SIZE
+ 1;
1483 val
>>= POWI_WINDOW_SIZE
;
1492 return result
+ powi_lookup_cost (val
, cache
);
1495 /* Recursive subroutine of powi_as_mults. This function takes the
1496 array, CACHE, of already calculated exponents and an exponent N and
1497 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1500 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1501 unsigned HOST_WIDE_INT n
, tree
*cache
)
1503 tree op0
, op1
, ssa_target
;
1504 unsigned HOST_WIDE_INT digit
;
1507 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1510 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1512 if (n
< POWI_TABLE_SIZE
)
1514 cache
[n
] = ssa_target
;
1515 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1516 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1520 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1521 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1522 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1526 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1530 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1531 gimple_set_location (mult_stmt
, loc
);
1532 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1537 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1538 This function needs to be kept in sync with powi_cost above. */
1541 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1542 tree arg0
, HOST_WIDE_INT n
)
1544 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1549 return build_one_cst (type
);
1551 memset (cache
, 0, sizeof (cache
));
1554 result
= powi_as_mults_1 (gsi
, loc
, type
, absu_hwi (n
), cache
);
1558 /* If the original exponent was negative, reciprocate the result. */
1559 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1560 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1561 build_real (type
, dconst1
), result
);
1562 gimple_set_location (div_stmt
, loc
);
1563 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1568 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1569 location info LOC. If the arguments are appropriate, create an
1570 equivalent sequence of statements prior to GSI using an optimal
1571 number of multiplications, and return an expession holding the
1575 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1576 tree arg0
, HOST_WIDE_INT n
)
1578 if ((n
>= -1 && n
<= 2)
1579 || (optimize_function_for_speed_p (cfun
)
1580 && powi_cost (n
) <= POWI_MAX_MULTS
))
1581 return powi_as_mults (gsi
, loc
, arg0
, n
);
1586 /* Build a gimple call statement that calls FN with argument ARG.
1587 Set the lhs of the call statement to a fresh SSA name. Insert the
1588 statement prior to GSI's current position, and return the fresh
1592 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1598 call_stmt
= gimple_build_call (fn
, 1, arg
);
1599 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1600 gimple_set_lhs (call_stmt
, ssa_target
);
1601 gimple_set_location (call_stmt
, loc
);
1602 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1607 /* Build a gimple binary operation with the given CODE and arguments
1608 ARG0, ARG1, assigning the result to a new SSA name for variable
1609 TARGET. Insert the statement prior to GSI's current position, and
1610 return the fresh SSA name.*/
1613 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1614 const char *name
, enum tree_code code
,
1615 tree arg0
, tree arg1
)
1617 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1618 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1619 gimple_set_location (stmt
, loc
);
1620 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1624 /* Build a gimple reference operation with the given CODE and argument
1625 ARG, assigning the result to a new SSA name of TYPE with NAME.
1626 Insert the statement prior to GSI's current position, and return
1627 the fresh SSA name. */
1630 build_and_insert_ref (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1631 const char *name
, enum tree_code code
, tree arg0
)
1633 tree result
= make_temp_ssa_name (type
, NULL
, name
);
1634 gimple
*stmt
= gimple_build_assign (result
, build1 (code
, type
, arg0
));
1635 gimple_set_location (stmt
, loc
);
1636 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1640 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1641 prior to GSI's current position, and return the fresh SSA name. */
1644 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1645 tree type
, tree val
)
1647 tree result
= make_ssa_name (type
);
1648 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1649 gimple_set_location (stmt
, loc
);
1650 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1654 struct pow_synth_sqrt_info
1657 unsigned int deepest
;
1658 unsigned int num_mults
;
1661 /* Return true iff the real value C can be represented as a
1662 sum of powers of 0.5 up to N. That is:
1663 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1664 Record in INFO the various parameters of the synthesis algorithm such
1665 as the factors a[i], the maximum 0.5 power and the number of
1666 multiplications that will be required. */
1669 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1670 struct pow_synth_sqrt_info
*info
)
1672 REAL_VALUE_TYPE factor
= dconsthalf
;
1673 REAL_VALUE_TYPE remainder
= c
;
1676 info
->num_mults
= 0;
1677 memset (info
->factors
, 0, n
* sizeof (bool));
1679 for (unsigned i
= 0; i
< n
; i
++)
1681 REAL_VALUE_TYPE res
;
1683 /* If something inexact happened bail out now. */
1684 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1687 /* We have hit zero. The number is representable as a sum
1688 of powers of 0.5. */
1689 if (real_equal (&res
, &dconst0
))
1691 info
->factors
[i
] = true;
1692 info
->deepest
= i
+ 1;
1695 else if (!REAL_VALUE_NEGATIVE (res
))
1698 info
->factors
[i
] = true;
1702 info
->factors
[i
] = false;
1704 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1709 /* Return the tree corresponding to FN being applied
1710 to ARG N times at GSI and LOC.
1711 Look up previous results from CACHE if need be.
1712 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1715 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1716 tree fn
, location_t loc
, tree
*cache
)
1718 tree res
= cache
[n
];
1721 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1722 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1729 /* Print to STREAM the repeated application of function FNAME to ARG
1730 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1734 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1738 fprintf (stream
, "%s", arg
);
1741 fprintf (stream
, "%s (", fname
);
1742 print_nested_fn (stream
, fname
, arg
, n
- 1);
1743 fprintf (stream
, ")");
1747 /* Print to STREAM the fractional sequence of sqrt chains
1748 applied to ARG, described by INFO. Used for the dump file. */
1751 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1752 struct pow_synth_sqrt_info
*info
)
1754 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1756 bool is_set
= info
->factors
[i
];
1759 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1760 if (i
!= info
->deepest
- 1)
1761 fprintf (stream
, " * ");
1766 /* Print to STREAM a representation of raising ARG to an integer
1767 power N. Used for the dump file. */
1770 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1773 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1775 fprintf (stream
, "%s", arg
);
1778 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1779 square roots. Place at GSI and LOC. Limit the maximum depth
1780 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1781 result of the expanded sequence or NULL_TREE if the expansion failed.
1783 This routine assumes that ARG1 is a real number with a fractional part
1784 (the integer exponent case will have been handled earlier in
1785 gimple_expand_builtin_pow).
1788 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1789 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1790 FRAC_PART == ARG1 - WHOLE_PART:
1791 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1792 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1793 if it can be expressed as such, that is if FRAC_PART satisfies:
1794 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1795 where integer a[i] is either 0 or 1.
1798 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1799 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1801 For ARG1 < 0.0 there are two approaches:
1802 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1803 is calculated as above.
1806 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1807 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1809 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1810 FRAC_PART := ARG1 - WHOLE_PART
1811 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1813 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1814 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1816 For ARG1 < 0.0 we choose between (A) and (B) depending on
1817 how many multiplications we'd have to do.
1818 So, for the example in (B): POW (x, -5.875), if we were to
1819 follow algorithm (A) we would produce:
1820 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1821 which contains more multiplications than approach (B).
1823 Hopefully, this approach will eliminate potentially expensive POW library
1824 calls when unsafe floating point math is enabled and allow the compiler to
1825 further optimise the multiplies, square roots and divides produced by this
1829 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1830 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1832 tree type
= TREE_TYPE (arg0
);
1833 machine_mode mode
= TYPE_MODE (type
);
1834 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1835 bool one_over
= true;
1840 if (TREE_CODE (arg1
) != REAL_CST
)
1843 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1845 gcc_assert (max_depth
> 0);
1846 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1848 struct pow_synth_sqrt_info synth_info
;
1849 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1850 synth_info
.deepest
= 0;
1851 synth_info
.num_mults
= 0;
1853 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1854 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1856 /* The whole and fractional parts of exp. */
1857 REAL_VALUE_TYPE whole_part
;
1858 REAL_VALUE_TYPE frac_part
;
1860 real_floor (&whole_part
, mode
, &exp
);
1861 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1864 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1865 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1869 real_ceil (&ceil_whole
, mode
, &exp
);
1870 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1873 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1876 /* Check whether it's more profitable to not use 1.0 / ... */
1879 struct pow_synth_sqrt_info alt_synth_info
;
1880 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1881 alt_synth_info
.deepest
= 0;
1882 alt_synth_info
.num_mults
= 0;
1884 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1886 && alt_synth_info
.deepest
<= synth_info
.deepest
1887 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1889 whole_part
= ceil_whole
;
1890 frac_part
= ceil_fract
;
1891 synth_info
.deepest
= alt_synth_info
.deepest
;
1892 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1893 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1894 (max_depth
+ 1) * sizeof (bool));
1899 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1900 REAL_VALUE_TYPE cint
;
1901 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1903 if (!real_identical (&whole_part
, &cint
))
1906 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1909 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1911 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1913 /* Calculate the integer part of the exponent. */
1916 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1925 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1926 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1932 fprintf (dump_file
, "1.0 / (");
1933 dump_integer_part (dump_file
, "x", n
);
1935 fprintf (dump_file
, " * ");
1936 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1937 fprintf (dump_file
, ")");
1941 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1942 fprintf (dump_file
, " / (");
1943 dump_integer_part (dump_file
, "x", n
);
1944 fprintf (dump_file
, ")");
1949 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1951 fprintf (dump_file
, " * ");
1952 dump_integer_part (dump_file
, "x", n
);
1955 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1959 tree fract_res
= NULL_TREE
;
1962 /* Calculate the fractional part of the exponent. */
1963 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1965 if (synth_info
.factors
[i
])
1967 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1970 fract_res
= sqrt_chain
;
1973 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1974 fract_res
, sqrt_chain
);
1978 tree res
= NULL_TREE
;
1985 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1986 fract_res
, integer_res
);
1990 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1991 build_real (type
, dconst1
), res
);
1995 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1996 fract_res
, integer_res
);
2000 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2001 fract_res
, integer_res
);
2005 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
2006 with location info LOC. If possible, create an equivalent and
2007 less expensive sequence of statements prior to GSI, and return an
2008 expession holding the result. */
2011 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
2012 tree arg0
, tree arg1
)
2014 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
2015 REAL_VALUE_TYPE c2
, dconst3
;
2017 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
2019 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
2020 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
2022 dconst1_4
= dconst1
;
2023 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
2025 /* If the exponent isn't a constant, there's nothing of interest
2027 if (TREE_CODE (arg1
) != REAL_CST
)
2030 /* Don't perform the operation if flag_signaling_nans is on
2031 and the operand is a signaling NaN. */
2032 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1
)))
2033 && ((TREE_CODE (arg0
) == REAL_CST
2034 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0
)))
2035 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1
))))
2038 /* If the exponent is equivalent to an integer, expand to an optimal
2039 multiplication sequence when profitable. */
2040 c
= TREE_REAL_CST (arg1
);
2041 n
= real_to_integer (&c
);
2042 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2043 c_is_int
= real_identical (&c
, &cint
);
2046 && ((n
>= -1 && n
<= 2)
2047 || (flag_unsafe_math_optimizations
2049 && powi_cost (n
) <= POWI_MAX_MULTS
)))
2050 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
2052 /* Attempt various optimizations using sqrt and cbrt. */
2053 type
= TREE_TYPE (arg0
);
2054 mode
= TYPE_MODE (type
);
2055 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2057 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2058 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2061 && real_equal (&c
, &dconsthalf
)
2062 && !HONOR_SIGNED_ZEROS (mode
))
2063 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2065 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
2067 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2068 optimizations since 1./3. is not exactly representable. If x
2069 is negative and finite, the correct value of pow(x,1./3.) is
2070 a NaN with the "invalid" exception raised, because the value
2071 of 1./3. actually has an even denominator. The correct value
2072 of cbrt(x) is a negative real value. */
2073 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
2074 dconst1_3
= real_value_truncate (mode
, dconst_third ());
2076 if (flag_unsafe_math_optimizations
2078 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2079 && real_equal (&c
, &dconst1_3
))
2080 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2082 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2083 if we don't have a hardware sqrt insn. */
2084 dconst1_6
= dconst1_3
;
2085 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
2087 if (flag_unsafe_math_optimizations
2090 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2093 && real_equal (&c
, &dconst1_6
))
2096 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2099 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
2103 /* Attempt to expand the POW as a product of square root chains.
2104 Expand the 0.25 case even when otpimising for size. */
2105 if (flag_unsafe_math_optimizations
2108 && (speed_p
|| real_equal (&c
, &dconst1_4
))
2109 && !HONOR_SIGNED_ZEROS (mode
))
2111 unsigned int max_depth
= speed_p
2112 ? param_max_pow_sqrt_depth
2115 tree expand_with_sqrts
2116 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
2118 if (expand_with_sqrts
)
2119 return expand_with_sqrts
;
2122 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
2123 n
= real_to_integer (&c2
);
2124 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2125 c2_is_int
= real_identical (&c2
, &cint
);
2127 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2129 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2130 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2132 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2133 different from pow(x, 1./3.) due to rounding and behavior with
2134 negative x, we need to constrain this transformation to unsafe
2135 math and positive x or finite math. */
2136 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
2137 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
2138 real_round (&c2
, mode
, &c2
);
2139 n
= real_to_integer (&c2
);
2140 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2141 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
2142 real_convert (&c2
, mode
, &c2
);
2144 if (flag_unsafe_math_optimizations
2146 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2147 && real_identical (&c2
, &c
)
2149 && optimize_function_for_speed_p (cfun
)
2150 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
2152 tree powi_x_ndiv3
= NULL_TREE
;
2154 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2155 possible or profitable, give up. Skip the degenerate case when
2156 abs(n) < 3, where the result is always 1. */
2157 if (absu_hwi (n
) >= 3)
2159 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
2165 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2166 as that creates an unnecessary variable. Instead, just produce
2167 either cbrt(x) or cbrt(x) * cbrt(x). */
2168 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2170 if (absu_hwi (n
) % 3 == 1)
2171 powi_cbrt_x
= cbrt_x
;
2173 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2176 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2177 if (absu_hwi (n
) < 3)
2178 result
= powi_cbrt_x
;
2180 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2181 powi_x_ndiv3
, powi_cbrt_x
);
2183 /* If n is negative, reciprocate the result. */
2185 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
2186 build_real (type
, dconst1
), result
);
2191 /* No optimizations succeeded. */
2195 /* ARG is the argument to a cabs builtin call in GSI with location info
2196 LOC. Create a sequence of statements prior to GSI that calculates
2197 sqrt(R*R + I*I), where R and I are the real and imaginary components
2198 of ARG, respectively. Return an expression holding the result. */
2201 gimple_expand_builtin_cabs (gimple_stmt_iterator
*gsi
, location_t loc
, tree arg
)
2203 tree real_part
, imag_part
, addend1
, addend2
, sum
, result
;
2204 tree type
= TREE_TYPE (TREE_TYPE (arg
));
2205 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2206 machine_mode mode
= TYPE_MODE (type
);
2208 if (!flag_unsafe_math_optimizations
2209 || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi
)))
2211 || optab_handler (sqrt_optab
, mode
) == CODE_FOR_nothing
)
2214 real_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2215 REALPART_EXPR
, arg
);
2216 addend1
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2217 real_part
, real_part
);
2218 imag_part
= build_and_insert_ref (gsi
, loc
, type
, "cabs",
2219 IMAGPART_EXPR
, arg
);
2220 addend2
= build_and_insert_binop (gsi
, loc
, "cabs", MULT_EXPR
,
2221 imag_part
, imag_part
);
2222 sum
= build_and_insert_binop (gsi
, loc
, "cabs", PLUS_EXPR
, addend1
, addend2
);
2223 result
= build_and_insert_call (gsi
, loc
, sqrtfn
, sum
);
2228 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2229 on the SSA_NAME argument of each of them. */
2233 const pass_data pass_data_cse_sincos
=
2235 GIMPLE_PASS
, /* type */
2236 "sincos", /* name */
2237 OPTGROUP_NONE
, /* optinfo_flags */
2238 TV_TREE_SINCOS
, /* tv_id */
2239 PROP_ssa
, /* properties_required */
2240 0, /* properties_provided */
2241 0, /* properties_destroyed */
2242 0, /* todo_flags_start */
2243 TODO_update_ssa
, /* todo_flags_finish */
2246 class pass_cse_sincos
: public gimple_opt_pass
2249 pass_cse_sincos (gcc::context
*ctxt
)
2250 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
2253 /* opt_pass methods: */
2254 bool gate (function
*) final override
2259 unsigned int execute (function
*) final override
;
2261 }; // class pass_cse_sincos
2264 pass_cse_sincos::execute (function
*fun
)
2267 bool cfg_changed
= false;
2269 calculate_dominance_info (CDI_DOMINATORS
);
2270 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
2272 FOR_EACH_BB_FN (bb
, fun
)
2274 gimple_stmt_iterator gsi
;
2276 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2278 gimple
*stmt
= gsi_stmt (gsi
);
2280 if (is_gimple_call (stmt
)
2281 && gimple_call_lhs (stmt
))
2284 switch (gimple_call_combined_fn (stmt
))
2289 arg
= gimple_call_arg (stmt
, 0);
2290 /* Make sure we have either sincos or cexp. */
2291 if (!targetm
.libc_has_function (function_c99_math_complex
,
2293 && !targetm
.libc_has_function (function_sincos
,
2297 if (TREE_CODE (arg
) == SSA_NAME
)
2298 cfg_changed
|= execute_cse_sincos_1 (arg
);
2307 statistics_counter_event (fun
, "sincos statements inserted",
2308 sincos_stats
.inserted
);
2309 statistics_counter_event (fun
, "conv statements removed",
2310 sincos_stats
.conv_removed
);
2312 return cfg_changed
? TODO_cleanup_cfg
: 0;
2318 make_pass_cse_sincos (gcc::context
*ctxt
)
2320 return new pass_cse_sincos (ctxt
);
2323 /* Expand powi(x,n) into an optimal number of multiplies, when n is a constant.
2324 Also expand CABS. */
2327 const pass_data pass_data_expand_powcabs
=
2329 GIMPLE_PASS
, /* type */
2330 "powcabs", /* name */
2331 OPTGROUP_NONE
, /* optinfo_flags */
2332 TV_TREE_POWCABS
, /* tv_id */
2333 PROP_ssa
, /* properties_required */
2334 PROP_gimple_opt_math
, /* properties_provided */
2335 0, /* properties_destroyed */
2336 0, /* todo_flags_start */
2337 TODO_update_ssa
, /* todo_flags_finish */
2340 class pass_expand_powcabs
: public gimple_opt_pass
2343 pass_expand_powcabs (gcc::context
*ctxt
)
2344 : gimple_opt_pass (pass_data_expand_powcabs
, ctxt
)
2347 /* opt_pass methods: */
2348 bool gate (function
*) final override
2353 unsigned int execute (function
*) final override
;
2355 }; // class pass_expand_powcabs
2358 pass_expand_powcabs::execute (function
*fun
)
2361 bool cfg_changed
= false;
2363 calculate_dominance_info (CDI_DOMINATORS
);
2365 FOR_EACH_BB_FN (bb
, fun
)
2367 gimple_stmt_iterator gsi
;
2368 bool cleanup_eh
= false;
2370 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2372 gimple
*stmt
= gsi_stmt (gsi
);
2374 /* Only the last stmt in a bb could throw, no need to call
2375 gimple_purge_dead_eh_edges if we change something in the middle
2376 of a basic block. */
2379 if (is_gimple_call (stmt
)
2380 && gimple_call_lhs (stmt
))
2382 tree arg0
, arg1
, result
;
2386 switch (gimple_call_combined_fn (stmt
))
2389 arg0
= gimple_call_arg (stmt
, 0);
2390 arg1
= gimple_call_arg (stmt
, 1);
2392 loc
= gimple_location (stmt
);
2393 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
2397 tree lhs
= gimple_get_lhs (stmt
);
2398 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2399 gimple_set_location (new_stmt
, loc
);
2400 unlink_stmt_vdef (stmt
);
2401 gsi_replace (&gsi
, new_stmt
, true);
2403 if (gimple_vdef (stmt
))
2404 release_ssa_name (gimple_vdef (stmt
));
2409 arg0
= gimple_call_arg (stmt
, 0);
2410 arg1
= gimple_call_arg (stmt
, 1);
2411 loc
= gimple_location (stmt
);
2413 if (real_minus_onep (arg0
))
2415 tree t0
, t1
, cond
, one
, minus_one
;
2418 t0
= TREE_TYPE (arg0
);
2419 t1
= TREE_TYPE (arg1
);
2420 one
= build_real (t0
, dconst1
);
2421 minus_one
= build_real (t0
, dconstm1
);
2423 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
2424 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
2425 arg1
, build_int_cst (t1
, 1));
2426 gimple_set_location (stmt
, loc
);
2427 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2429 result
= make_temp_ssa_name (t0
, NULL
, "powi");
2430 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
2432 gimple_set_location (stmt
, loc
);
2433 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2437 if (!tree_fits_shwi_p (arg1
))
2440 n
= tree_to_shwi (arg1
);
2441 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
2446 tree lhs
= gimple_get_lhs (stmt
);
2447 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2448 gimple_set_location (new_stmt
, loc
);
2449 unlink_stmt_vdef (stmt
);
2450 gsi_replace (&gsi
, new_stmt
, true);
2452 if (gimple_vdef (stmt
))
2453 release_ssa_name (gimple_vdef (stmt
));
2458 arg0
= gimple_call_arg (stmt
, 0);
2459 loc
= gimple_location (stmt
);
2460 result
= gimple_expand_builtin_cabs (&gsi
, loc
, arg0
);
2464 tree lhs
= gimple_get_lhs (stmt
);
2465 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2466 gimple_set_location (new_stmt
, loc
);
2467 unlink_stmt_vdef (stmt
);
2468 gsi_replace (&gsi
, new_stmt
, true);
2470 if (gimple_vdef (stmt
))
2471 release_ssa_name (gimple_vdef (stmt
));
2480 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
2483 return cfg_changed
? TODO_cleanup_cfg
: 0;
2489 make_pass_expand_powcabs (gcc::context
*ctxt
)
2491 return new pass_expand_powcabs (ctxt
);
2494 /* Return true if stmt is a type conversion operation that can be stripped
2495 when used in a widening multiply operation. */
2497 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2499 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2501 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2506 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2509 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2511 /* If the type of OP has the same precision as the result, then
2512 we can strip this conversion. The multiply operation will be
2513 selected to create the correct extension as a by-product. */
2514 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2517 /* We can also strip a conversion if it preserves the signed-ness of
2518 the operation and doesn't narrow the range. */
2519 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2521 /* If the inner-most type is unsigned, then we can strip any
2522 intermediate widening operation. If it's signed, then the
2523 intermediate widening operation must also be signed. */
2524 if ((TYPE_UNSIGNED (inner_op_type
)
2525 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2526 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2532 return rhs_code
== FIXED_CONVERT_EXPR
;
2535 /* Return true if RHS is a suitable operand for a widening multiplication,
2536 assuming a target type of TYPE.
2537 There are two cases:
2539 - RHS makes some value at least twice as wide. Store that value
2540 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2542 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2543 but leave *TYPE_OUT untouched. */
2546 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2552 if (TREE_CODE (rhs
) == SSA_NAME
)
2554 stmt
= SSA_NAME_DEF_STMT (rhs
);
2555 if (is_gimple_assign (stmt
))
2557 if (! widening_mult_conversion_strippable_p (type
, stmt
))
2561 rhs1
= gimple_assign_rhs1 (stmt
);
2563 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2565 *new_rhs_out
= rhs1
;
2574 type1
= TREE_TYPE (rhs1
);
2576 if (TREE_CODE (type1
) != TREE_CODE (type
)
2577 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2580 *new_rhs_out
= rhs1
;
2585 if (TREE_CODE (rhs
) == INTEGER_CST
)
2595 /* Return true if STMT performs a widening multiplication, assuming the
2596 output type is TYPE. If so, store the unwidened types of the operands
2597 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2598 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2599 and *TYPE2_OUT would give the operands of the multiplication. */
2602 is_widening_mult_p (gimple
*stmt
,
2603 tree
*type1_out
, tree
*rhs1_out
,
2604 tree
*type2_out
, tree
*rhs2_out
)
2606 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2608 if (TREE_CODE (type
) == INTEGER_TYPE
)
2610 if (TYPE_OVERFLOW_TRAPS (type
))
2613 else if (TREE_CODE (type
) != FIXED_POINT_TYPE
)
2616 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2620 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2624 if (*type1_out
== NULL
)
2626 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2628 *type1_out
= *type2_out
;
2631 if (*type2_out
== NULL
)
2633 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2635 *type2_out
= *type1_out
;
2638 /* Ensure that the larger of the two operands comes first. */
2639 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2641 std::swap (*type1_out
, *type2_out
);
2642 std::swap (*rhs1_out
, *rhs2_out
);
2648 /* Check to see if the CALL statement is an invocation of copysign
2649 with 1. being the first argument. */
2651 is_copysign_call_with_1 (gimple
*call
)
2653 gcall
*c
= dyn_cast
<gcall
*> (call
);
2657 enum combined_fn code
= gimple_call_combined_fn (c
);
2659 if (code
== CFN_LAST
)
2662 if (builtin_fn_p (code
))
2664 switch (as_builtin_fn (code
))
2666 CASE_FLT_FN (BUILT_IN_COPYSIGN
):
2667 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN
):
2668 return real_onep (gimple_call_arg (c
, 0));
2674 if (internal_fn_p (code
))
2676 switch (as_internal_fn (code
))
2679 return real_onep (gimple_call_arg (c
, 0));
2688 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2689 This only happens when the xorsign optab is defined, if the
2690 pattern is not a xorsign pattern or if expansion fails FALSE is
2691 returned, otherwise TRUE is returned. */
2693 convert_expand_mult_copysign (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2695 tree treeop0
, treeop1
, lhs
, type
;
2696 location_t loc
= gimple_location (stmt
);
2697 lhs
= gimple_assign_lhs (stmt
);
2698 treeop0
= gimple_assign_rhs1 (stmt
);
2699 treeop1
= gimple_assign_rhs2 (stmt
);
2700 type
= TREE_TYPE (lhs
);
2701 machine_mode mode
= TYPE_MODE (type
);
2703 if (HONOR_SNANS (type
))
2706 if (TREE_CODE (treeop0
) == SSA_NAME
&& TREE_CODE (treeop1
) == SSA_NAME
)
2708 gimple
*call0
= SSA_NAME_DEF_STMT (treeop0
);
2709 if (!has_single_use (treeop0
) || !is_copysign_call_with_1 (call0
))
2711 call0
= SSA_NAME_DEF_STMT (treeop1
);
2712 if (!has_single_use (treeop1
) || !is_copysign_call_with_1 (call0
))
2717 if (optab_handler (xorsign_optab
, mode
) == CODE_FOR_nothing
)
2720 gcall
*c
= as_a
<gcall
*> (call0
);
2721 treeop0
= gimple_call_arg (c
, 1);
2724 = gimple_build_call_internal (IFN_XORSIGN
, 2, treeop1
, treeop0
);
2725 gimple_set_lhs (call_stmt
, lhs
);
2726 gimple_set_location (call_stmt
, loc
);
2727 gsi_replace (gsi
, call_stmt
, true);
2734 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2735 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2736 value is true iff we converted the statement. */
2739 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2741 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2742 enum insn_code handler
;
2743 scalar_int_mode to_mode
, from_mode
, actual_mode
;
2745 int actual_precision
;
2746 location_t loc
= gimple_location (stmt
);
2747 bool from_unsigned1
, from_unsigned2
;
2749 lhs
= gimple_assign_lhs (stmt
);
2750 type
= TREE_TYPE (lhs
);
2751 if (TREE_CODE (type
) != INTEGER_TYPE
)
2754 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
2757 to_mode
= SCALAR_INT_TYPE_MODE (type
);
2758 from_mode
= SCALAR_INT_TYPE_MODE (type1
);
2759 if (to_mode
== from_mode
)
2762 from_unsigned1
= TYPE_UNSIGNED (type1
);
2763 from_unsigned2
= TYPE_UNSIGNED (type2
);
2765 if (from_unsigned1
&& from_unsigned2
)
2766 op
= umul_widen_optab
;
2767 else if (!from_unsigned1
&& !from_unsigned2
)
2768 op
= smul_widen_optab
;
2770 op
= usmul_widen_optab
;
2772 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
2775 if (handler
== CODE_FOR_nothing
)
2777 if (op
!= smul_widen_optab
)
2779 /* We can use a signed multiply with unsigned types as long as
2780 there is a wider mode to use, or it is the smaller of the two
2781 types that is unsigned. Note that type1 >= type2, always. */
2782 if ((TYPE_UNSIGNED (type1
)
2783 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2784 || (TYPE_UNSIGNED (type2
)
2785 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2787 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2788 || GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
2792 op
= smul_widen_optab
;
2793 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2797 if (handler
== CODE_FOR_nothing
)
2800 from_unsigned1
= from_unsigned2
= false;
2804 /* Expand can synthesize smul_widen_optab if the target
2805 supports umul_widen_optab. */
2806 op
= umul_widen_optab
;
2807 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2810 if (handler
== CODE_FOR_nothing
)
2815 /* Ensure that the inputs to the handler are in the correct precison
2816 for the opcode. This will be the full mode size. */
2817 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2818 if (2 * actual_precision
> TYPE_PRECISION (type
))
2820 if (actual_precision
!= TYPE_PRECISION (type1
)
2821 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2822 rhs1
= build_and_insert_cast (gsi
, loc
,
2823 build_nonstandard_integer_type
2824 (actual_precision
, from_unsigned1
), rhs1
);
2825 if (actual_precision
!= TYPE_PRECISION (type2
)
2826 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2827 rhs2
= build_and_insert_cast (gsi
, loc
,
2828 build_nonstandard_integer_type
2829 (actual_precision
, from_unsigned2
), rhs2
);
2831 /* Handle constants. */
2832 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2833 rhs1
= fold_convert (type1
, rhs1
);
2834 if (TREE_CODE (rhs2
) == INTEGER_CST
)
2835 rhs2
= fold_convert (type2
, rhs2
);
2837 gimple_assign_set_rhs1 (stmt
, rhs1
);
2838 gimple_assign_set_rhs2 (stmt
, rhs2
);
2839 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
2841 widen_mul_stats
.widen_mults_inserted
++;
2845 /* Process a single gimple statement STMT, which is found at the
2846 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2847 rhs (given by CODE), and try to convert it into a
2848 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2849 is true iff we converted the statement. */
2852 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
2853 enum tree_code code
)
2855 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
2856 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
2857 tree type
, type1
, type2
, optype
;
2858 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
2859 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
2861 enum tree_code wmult_code
;
2862 enum insn_code handler
;
2863 scalar_mode to_mode
, from_mode
, actual_mode
;
2864 location_t loc
= gimple_location (stmt
);
2865 int actual_precision
;
2866 bool from_unsigned1
, from_unsigned2
;
2868 lhs
= gimple_assign_lhs (stmt
);
2869 type
= TREE_TYPE (lhs
);
2870 if (TREE_CODE (type
) != INTEGER_TYPE
2871 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2874 if (code
== MINUS_EXPR
)
2875 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
2877 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
2879 rhs1
= gimple_assign_rhs1 (stmt
);
2880 rhs2
= gimple_assign_rhs2 (stmt
);
2882 if (TREE_CODE (rhs1
) == SSA_NAME
)
2884 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2885 if (is_gimple_assign (rhs1_stmt
))
2886 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2889 if (TREE_CODE (rhs2
) == SSA_NAME
)
2891 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2892 if (is_gimple_assign (rhs2_stmt
))
2893 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2896 /* Allow for one conversion statement between the multiply
2897 and addition/subtraction statement. If there are more than
2898 one conversions then we assume they would invalidate this
2899 transformation. If that's not the case then they should have
2900 been folded before now. */
2901 if (CONVERT_EXPR_CODE_P (rhs1_code
))
2903 conv1_stmt
= rhs1_stmt
;
2904 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
2905 if (TREE_CODE (rhs1
) == SSA_NAME
)
2907 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2908 if (is_gimple_assign (rhs1_stmt
))
2909 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2914 if (CONVERT_EXPR_CODE_P (rhs2_code
))
2916 conv2_stmt
= rhs2_stmt
;
2917 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
2918 if (TREE_CODE (rhs2
) == SSA_NAME
)
2920 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2921 if (is_gimple_assign (rhs2_stmt
))
2922 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2928 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2929 is_widening_mult_p, but we still need the rhs returns.
2931 It might also appear that it would be sufficient to use the existing
2932 operands of the widening multiply, but that would limit the choice of
2933 multiply-and-accumulate instructions.
2935 If the widened-multiplication result has more than one uses, it is
2936 probably wiser not to do the conversion. Also restrict this operation
2937 to single basic block to avoid moving the multiply to a different block
2938 with a higher execution frequency. */
2939 if (code
== PLUS_EXPR
2940 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
2942 if (!has_single_use (rhs1
)
2943 || gimple_bb (rhs1_stmt
) != gimple_bb (stmt
)
2944 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
2945 &type2
, &mult_rhs2
))
2948 conv_stmt
= conv1_stmt
;
2950 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
2952 if (!has_single_use (rhs2
)
2953 || gimple_bb (rhs2_stmt
) != gimple_bb (stmt
)
2954 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
2955 &type2
, &mult_rhs2
))
2958 conv_stmt
= conv2_stmt
;
2963 to_mode
= SCALAR_TYPE_MODE (type
);
2964 from_mode
= SCALAR_TYPE_MODE (type1
);
2965 if (to_mode
== from_mode
)
2968 from_unsigned1
= TYPE_UNSIGNED (type1
);
2969 from_unsigned2
= TYPE_UNSIGNED (type2
);
2972 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2973 if (from_unsigned1
!= from_unsigned2
)
2975 if (!INTEGRAL_TYPE_P (type
))
2977 /* We can use a signed multiply with unsigned types as long as
2978 there is a wider mode to use, or it is the smaller of the two
2979 types that is unsigned. Note that type1 >= type2, always. */
2981 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2983 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2985 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2986 || GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
2990 from_unsigned1
= from_unsigned2
= false;
2991 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
2995 /* If there was a conversion between the multiply and addition
2996 then we need to make sure it fits a multiply-and-accumulate.
2997 The should be a single mode change which does not change the
3001 /* We use the original, unmodified data types for this. */
3002 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
3003 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
3004 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
3005 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
3007 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
3009 /* Conversion is a truncate. */
3010 if (TYPE_PRECISION (to_type
) < data_size
)
3013 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
3015 /* Conversion is an extend. Check it's the right sort. */
3016 if (TYPE_UNSIGNED (from_type
) != is_unsigned
3017 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
3020 /* else convert is a no-op for our purposes. */
3023 /* Verify that the machine can perform a widening multiply
3024 accumulate in this mode/signedness combination, otherwise
3025 this transformation is likely to pessimize code. */
3026 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
3027 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
3028 from_mode
, &actual_mode
);
3030 if (handler
== CODE_FOR_nothing
)
3033 /* Ensure that the inputs to the handler are in the correct precison
3034 for the opcode. This will be the full mode size. */
3035 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3036 if (actual_precision
!= TYPE_PRECISION (type1
)
3037 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3038 mult_rhs1
= build_and_insert_cast (gsi
, loc
,
3039 build_nonstandard_integer_type
3040 (actual_precision
, from_unsigned1
),
3042 if (actual_precision
!= TYPE_PRECISION (type2
)
3043 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3044 mult_rhs2
= build_and_insert_cast (gsi
, loc
,
3045 build_nonstandard_integer_type
3046 (actual_precision
, from_unsigned2
),
3049 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
3050 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
3052 /* Handle constants. */
3053 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
3054 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
3055 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
3056 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
3058 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
3060 update_stmt (gsi_stmt (*gsi
));
3061 widen_mul_stats
.maccs_inserted
++;
3065 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3066 OP2 and which we know is used in statements that can be, together with the
3067 multiplication, converted to FMAs, perform the transformation. */
3070 convert_mult_to_fma_1 (tree mul_result
, tree op1
, tree op2
)
3072 tree type
= TREE_TYPE (mul_result
);
3074 imm_use_iterator imm_iter
;
3077 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
3079 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3080 tree addop
, mulop1
= op1
, result
= mul_result
;
3081 bool negate_p
= false;
3082 gimple_seq seq
= NULL
;
3084 if (is_gimple_debug (use_stmt
))
3087 if (is_gimple_assign (use_stmt
)
3088 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3090 result
= gimple_assign_lhs (use_stmt
);
3091 use_operand_p use_p
;
3092 gimple
*neguse_stmt
;
3093 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3094 gsi_remove (&gsi
, true);
3095 release_defs (use_stmt
);
3097 use_stmt
= neguse_stmt
;
3098 gsi
= gsi_for_stmt (use_stmt
);
3102 tree cond
, else_value
, ops
[3];
3104 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
,
3107 addop
= ops
[0] == result
? ops
[1] : ops
[0];
3109 if (code
== MINUS_EXPR
)
3111 if (ops
[0] == result
)
3112 /* a * b - c -> a * b + (-c) */
3113 addop
= gimple_build (&seq
, NEGATE_EXPR
, type
, addop
);
3115 /* a - b * c -> (-b) * c + a */
3116 negate_p
= !negate_p
;
3120 mulop1
= gimple_build (&seq
, NEGATE_EXPR
, type
, mulop1
);
3123 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3126 fma_stmt
= gimple_build_call_internal (IFN_COND_FMA
, 5, cond
, mulop1
,
3127 op2
, addop
, else_value
);
3129 fma_stmt
= gimple_build_call_internal (IFN_FMA
, 3, mulop1
, op2
, addop
);
3130 gimple_set_lhs (fma_stmt
, gimple_get_lhs (use_stmt
));
3131 gimple_call_set_nothrow (fma_stmt
, !stmt_can_throw_internal (cfun
,
3133 gsi_replace (&gsi
, fma_stmt
, true);
3134 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3135 regardless of where the negation occurs. */
3136 gimple
*orig_stmt
= gsi_stmt (gsi
);
3137 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3139 if (maybe_clean_or_replace_eh_stmt (orig_stmt
, gsi_stmt (gsi
)))
3141 update_stmt (gsi_stmt (gsi
));
3144 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3146 fprintf (dump_file
, "Generated FMA ");
3147 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3148 fprintf (dump_file
, "\n");
3151 /* If the FMA result is negated in a single use, fold the negation
3153 orig_stmt
= gsi_stmt (gsi
);
3154 use_operand_p use_p
;
3156 if (is_gimple_call (orig_stmt
)
3157 && gimple_call_internal_p (orig_stmt
)
3158 && gimple_call_lhs (orig_stmt
)
3159 && TREE_CODE (gimple_call_lhs (orig_stmt
)) == SSA_NAME
3160 && single_imm_use (gimple_call_lhs (orig_stmt
), &use_p
, &neg_stmt
)
3161 && is_gimple_assign (neg_stmt
)
3162 && gimple_assign_rhs_code (neg_stmt
) == NEGATE_EXPR
3163 && !stmt_could_throw_p (cfun
, neg_stmt
))
3165 gsi
= gsi_for_stmt (neg_stmt
);
3166 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3168 if (maybe_clean_or_replace_eh_stmt (neg_stmt
, gsi_stmt (gsi
)))
3170 update_stmt (gsi_stmt (gsi
));
3171 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3173 fprintf (dump_file
, "Folded FMA negation ");
3174 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3175 fprintf (dump_file
, "\n");
3180 widen_mul_stats
.fmas_inserted
++;
3184 /* Data necessary to perform the actual transformation from a multiplication
3185 and an addition to an FMA after decision is taken it should be done and to
3186 then delete the multiplication statement from the function IL. */
3188 struct fma_transformation_info
3196 /* Structure containing the current state of FMA deferring, i.e. whether we are
3197 deferring, whether to continue deferring, and all data necessary to come
3198 back and perform all deferred transformations. */
3200 class fma_deferring_state
3203 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3204 do any deferring. */
3206 fma_deferring_state (bool perform_deferring
)
3207 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL
),
3208 m_last_result (NULL_TREE
), m_deferring_p (perform_deferring
) {}
3210 /* List of FMA candidates for which we the transformation has been determined
3211 possible but we at this point in BB analysis we do not consider them
3213 auto_vec
<fma_transformation_info
, 8> m_candidates
;
3215 /* Set of results of multiplication that are part of an already deferred FMA
3217 hash_set
<tree
> m_mul_result_set
;
3219 /* The PHI that supposedly feeds back result of a FMA to another over loop
3221 gphi
*m_initial_phi
;
3223 /* Result of the last produced FMA candidate or NULL if there has not been
3227 /* If true, deferring might still be profitable. If false, transform all
3228 candidates and no longer defer. */
3232 /* Transform all deferred FMA candidates and mark STATE as no longer
3236 cancel_fma_deferring (fma_deferring_state
*state
)
3238 if (!state
->m_deferring_p
)
3241 for (unsigned i
= 0; i
< state
->m_candidates
.length (); i
++)
3243 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3244 fprintf (dump_file
, "Generating deferred FMA\n");
3246 const fma_transformation_info
&fti
= state
->m_candidates
[i
];
3247 convert_mult_to_fma_1 (fti
.mul_result
, fti
.op1
, fti
.op2
);
3249 gimple_stmt_iterator gsi
= gsi_for_stmt (fti
.mul_stmt
);
3250 gsi_remove (&gsi
, true);
3251 release_defs (fti
.mul_stmt
);
3253 state
->m_deferring_p
= false;
3256 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3257 Otherwise return NULL. */
3260 result_of_phi (tree op
)
3262 if (TREE_CODE (op
) != SSA_NAME
)
3265 return dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (op
));
3268 /* After processing statements of a BB and recording STATE, return true if the
3269 initial phi is fed by the last FMA candidate result ore one such result from
3270 previously processed BBs marked in LAST_RESULT_SET. */
3273 last_fma_candidate_feeds_initial_phi (fma_deferring_state
*state
,
3274 hash_set
<tree
> *last_result_set
)
3278 FOR_EACH_PHI_ARG (use
, state
->m_initial_phi
, iter
, SSA_OP_USE
)
3280 tree t
= USE_FROM_PTR (use
);
3281 if (t
== state
->m_last_result
3282 || last_result_set
->contains (t
))
3289 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3290 with uses in additions and subtractions to form fused multiply-add
3291 operations. Returns true if successful and MUL_STMT should be removed.
3292 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3293 on MUL_COND, otherwise it is unconditional.
3295 If STATE indicates that we are deferring FMA transformation, that means
3296 that we do not produce FMAs for basic blocks which look like:
3299 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3301 accumulator_66 = _65 + accumulator_111;
3303 or its unrolled version, i.e. with several FMA candidates that feed result
3304 of one into the addend of another. Instead, we add them to a list in STATE
3305 and if we later discover an FMA candidate that is not part of such a chain,
3306 we go back and perform all deferred past candidates. */
3309 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
,
3310 fma_deferring_state
*state
, tree mul_cond
= NULL_TREE
)
3312 tree mul_result
= gimple_get_lhs (mul_stmt
);
3313 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3314 if the statement was left just for the side-effects. */
3317 tree type
= TREE_TYPE (mul_result
);
3318 gimple
*use_stmt
, *neguse_stmt
;
3319 use_operand_p use_p
;
3320 imm_use_iterator imm_iter
;
3322 if (FLOAT_TYPE_P (type
)
3323 && flag_fp_contract_mode
!= FP_CONTRACT_FAST
)
3326 /* We don't want to do bitfield reduction ops. */
3327 if (INTEGRAL_TYPE_P (type
)
3328 && (!type_has_mode_precision_p (type
) || TYPE_OVERFLOW_TRAPS (type
)))
3331 /* If the target doesn't support it, don't generate it. We assume that
3332 if fma isn't available then fms, fnma or fnms are not either. */
3333 optimization_type opt_type
= bb_optimization_type (gimple_bb (mul_stmt
));
3334 if (!direct_internal_fn_supported_p (IFN_FMA
, type
, opt_type
))
3337 /* If the multiplication has zero uses, it is kept around probably because
3338 of -fnon-call-exceptions. Don't optimize it away in that case,
3340 if (has_zero_uses (mul_result
))
3344 = (state
->m_deferring_p
3345 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type
)),
3346 param_avoid_fma_max_bits
));
3347 bool defer
= check_defer
;
3348 bool seen_negate_p
= false;
3350 /* There is no numerical difference between fused and unfused integer FMAs,
3351 and the assumption below that FMA is as cheap as addition is unlikely
3352 to be true, especially if the multiplication occurs multiple times on
3353 the same chain. E.g., for something like:
3355 (((a * b) + c) >> 1) + (a * b)
3357 we do not want to duplicate the a * b into two additions, not least
3358 because the result is not a natural FMA chain. */
3359 if (ANY_INTEGRAL_TYPE_P (type
)
3360 && !has_single_use (mul_result
))
3363 /* Make sure that the multiplication statement becomes dead after
3364 the transformation, thus that all uses are transformed to FMAs.
3365 This means we assume that an FMA operation has the same cost
3367 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3369 tree result
= mul_result
;
3370 bool negate_p
= false;
3372 use_stmt
= USE_STMT (use_p
);
3374 if (is_gimple_debug (use_stmt
))
3377 /* For now restrict this operations to single basic blocks. In theory
3378 we would want to support sinking the multiplication in
3384 to form a fma in the then block and sink the multiplication to the
3386 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3389 /* A negate on the multiplication leads to FNMA. */
3390 if (is_gimple_assign (use_stmt
)
3391 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3396 /* If (due to earlier missed optimizations) we have two
3397 negates of the same value, treat them as equivalent
3398 to a single negate with multiple uses. */
3402 result
= gimple_assign_lhs (use_stmt
);
3404 /* Make sure the negate statement becomes dead with this
3405 single transformation. */
3406 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3407 &use_p
, &neguse_stmt
))
3410 /* Make sure the multiplication isn't also used on that stmt. */
3411 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3412 if (USE_FROM_PTR (usep
) == mul_result
)
3416 use_stmt
= neguse_stmt
;
3417 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3420 negate_p
= seen_negate_p
= true;
3423 tree cond
, else_value
, ops
[3];
3425 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
, ops
,
3432 if (ops
[1] == result
)
3433 negate_p
= !negate_p
;
3438 /* FMA can only be formed from PLUS and MINUS. */
3442 if (mul_cond
&& cond
!= mul_cond
)
3447 if (cond
== result
|| else_value
== result
)
3449 if (!direct_internal_fn_supported_p (IFN_COND_FMA
, type
, opt_type
))
3453 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3454 we'll visit later, we might be able to get a more profitable
3456 OTOH, if we don't, a negate / fma pair has likely lower latency
3457 that a mult / subtract pair. */
3458 if (code
== MINUS_EXPR
3461 && !direct_internal_fn_supported_p (IFN_FMS
, type
, opt_type
)
3462 && direct_internal_fn_supported_p (IFN_FNMA
, type
, opt_type
)
3463 && TREE_CODE (ops
[1]) == SSA_NAME
3464 && has_single_use (ops
[1]))
3466 gimple
*stmt2
= SSA_NAME_DEF_STMT (ops
[1]);
3467 if (is_gimple_assign (stmt2
)
3468 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3472 /* We can't handle a * b + a * b. */
3473 if (ops
[0] == ops
[1])
3475 /* If deferring, make sure we are not looking at an instruction that
3476 wouldn't have existed if we were not. */
3477 if (state
->m_deferring_p
3478 && (state
->m_mul_result_set
.contains (ops
[0])
3479 || state
->m_mul_result_set
.contains (ops
[1])))
3484 tree use_lhs
= gimple_get_lhs (use_stmt
);
3485 if (state
->m_last_result
)
3487 if (ops
[1] == state
->m_last_result
3488 || ops
[0] == state
->m_last_result
)
3495 gcc_checking_assert (!state
->m_initial_phi
);
3497 if (ops
[0] == result
)
3498 phi
= result_of_phi (ops
[1]);
3501 gcc_assert (ops
[1] == result
);
3502 phi
= result_of_phi (ops
[0]);
3507 state
->m_initial_phi
= phi
;
3514 state
->m_last_result
= use_lhs
;
3515 check_defer
= false;
3520 /* While it is possible to validate whether or not the exact form that
3521 we've recognized is available in the backend, the assumption is that
3522 if the deferring logic above did not trigger, the transformation is
3523 never a loss. For instance, suppose the target only has the plain FMA
3524 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3525 MUL+SUB for FMA+NEG, which is still two operations. Consider
3526 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3527 form the two NEGs are independent and could be run in parallel. */
3532 fma_transformation_info fti
;
3533 fti
.mul_stmt
= mul_stmt
;
3534 fti
.mul_result
= mul_result
;
3537 state
->m_candidates
.safe_push (fti
);
3538 state
->m_mul_result_set
.add (mul_result
);
3540 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3542 fprintf (dump_file
, "Deferred generating FMA for multiplication ");
3543 print_gimple_stmt (dump_file
, mul_stmt
, 0, TDF_NONE
);
3544 fprintf (dump_file
, "\n");
3551 if (state
->m_deferring_p
)
3552 cancel_fma_deferring (state
);
3553 convert_mult_to_fma_1 (mul_result
, op1
, op2
);
3559 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3560 a check for non-zero like:
3561 _1 = x_4(D) * y_5(D);
3564 goto <bb 3>; [50.00%]
3566 goto <bb 4>; [50.00%]
3568 <bb 3> [local count: 536870913]:
3573 <bb 4> [local count: 1073741824]:
3574 # iftmp.0_3 = PHI <_10(3), 0(2)>
3575 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3576 optimize the x_4(D) != 0 condition to 1. */
3579 maybe_optimize_guarding_check (vec
<gimple
*> &mul_stmts
, gimple
*cond_stmt
,
3580 gimple
*div_stmt
, bool *cfg_changed
)
3582 basic_block bb
= gimple_bb (cond_stmt
);
3583 if (gimple_bb (div_stmt
) != bb
|| !single_pred_p (bb
))
3585 edge pred_edge
= single_pred_edge (bb
);
3586 basic_block pred_bb
= pred_edge
->src
;
3587 if (EDGE_COUNT (pred_bb
->succs
) != 2)
3589 edge other_edge
= EDGE_SUCC (pred_bb
, EDGE_SUCC (pred_bb
, 0) == pred_edge
);
3590 edge other_succ_edge
= NULL
;
3591 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3593 if (EDGE_COUNT (bb
->succs
) != 2)
3595 other_succ_edge
= EDGE_SUCC (bb
, 0);
3596 if (gimple_cond_code (cond_stmt
) == NE_EXPR
)
3598 if (other_succ_edge
->flags
& EDGE_TRUE_VALUE
)
3599 other_succ_edge
= EDGE_SUCC (bb
, 1);
3601 else if (other_succ_edge
->flags
& EDGE_FALSE_VALUE
)
3602 other_succ_edge
= EDGE_SUCC (bb
, 0);
3603 if (other_edge
->dest
!= other_succ_edge
->dest
)
3606 else if (!single_succ_p (bb
) || other_edge
->dest
!= single_succ (bb
))
3608 gcond
*zero_cond
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (pred_bb
));
3609 if (zero_cond
== NULL
3610 || (gimple_cond_code (zero_cond
)
3611 != ((pred_edge
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
))
3612 || !integer_zerop (gimple_cond_rhs (zero_cond
)))
3614 tree zero_cond_lhs
= gimple_cond_lhs (zero_cond
);
3615 if (TREE_CODE (zero_cond_lhs
) != SSA_NAME
)
3617 if (gimple_assign_rhs2 (div_stmt
) != zero_cond_lhs
)
3619 /* Allow the divisor to be result of a same precision cast
3620 from zero_cond_lhs. */
3621 tree rhs2
= gimple_assign_rhs2 (div_stmt
);
3622 if (TREE_CODE (rhs2
) != SSA_NAME
)
3624 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3625 if (!gimple_assign_cast_p (g
)
3626 || gimple_assign_rhs1 (g
) != gimple_cond_lhs (zero_cond
)
3627 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs
))
3628 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs
))
3629 != TYPE_PRECISION (TREE_TYPE (rhs2
))))
3632 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
3633 mul_stmts
.quick_push (div_stmt
);
3634 if (is_gimple_debug (gsi_stmt (gsi
)))
3635 gsi_next_nondebug (&gsi
);
3636 unsigned cast_count
= 0;
3637 while (gsi_stmt (gsi
) != cond_stmt
)
3639 /* If original mul_stmt has a single use, allow it in the same bb,
3640 we are looking then just at __builtin_mul_overflow_p.
3641 Though, in that case the original mul_stmt will be replaced
3642 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3646 FOR_EACH_VEC_ELT (mul_stmts
, i
, mul_stmt
)
3648 if (gsi_stmt (gsi
) == mul_stmt
)
3654 if (!ok
&& gimple_assign_cast_p (gsi_stmt (gsi
)) && ++cast_count
< 4)
3658 gsi_next_nondebug (&gsi
);
3660 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3662 basic_block succ_bb
= other_edge
->dest
;
3663 for (gphi_iterator gpi
= gsi_start_phis (succ_bb
); !gsi_end_p (gpi
);
3666 gphi
*phi
= gpi
.phi ();
3667 tree v1
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3668 tree v2
= gimple_phi_arg_def (phi
, other_succ_edge
->dest_idx
);
3669 if (!operand_equal_p (v1
, v2
, 0))
3675 tree lhs
= gimple_assign_lhs (cond_stmt
);
3676 if (!lhs
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
3678 gsi_next_nondebug (&gsi
);
3679 if (!gsi_end_p (gsi
))
3681 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3683 gimple
*cast_stmt
= gsi_stmt (gsi
);
3684 if (!gimple_assign_cast_p (cast_stmt
))
3686 tree new_lhs
= gimple_assign_lhs (cast_stmt
);
3687 gsi_next_nondebug (&gsi
);
3688 if (!gsi_end_p (gsi
)
3690 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs
))
3691 || TYPE_PRECISION (TREE_TYPE (new_lhs
)) <= 1)
3695 edge succ_edge
= single_succ_edge (bb
);
3696 basic_block succ_bb
= succ_edge
->dest
;
3697 gsi
= gsi_start_phis (succ_bb
);
3698 if (gsi_end_p (gsi
))
3700 gphi
*phi
= as_a
<gphi
*> (gsi_stmt (gsi
));
3702 if (!gsi_end_p (gsi
))
3704 if (gimple_phi_arg_def (phi
, succ_edge
->dest_idx
) != lhs
)
3706 tree other_val
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3707 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3709 tree cond
= gimple_assign_rhs1 (cond_stmt
);
3710 if (TREE_CODE (cond
) == NE_EXPR
)
3712 if (!operand_equal_p (other_val
,
3713 gimple_assign_rhs3 (cond_stmt
), 0))
3716 else if (!operand_equal_p (other_val
,
3717 gimple_assign_rhs2 (cond_stmt
), 0))
3720 else if (gimple_assign_rhs_code (cond_stmt
) == NE_EXPR
)
3722 if (!integer_zerop (other_val
))
3725 else if (!integer_onep (other_val
))
3728 if (pred_edge
->flags
& EDGE_TRUE_VALUE
)
3729 gimple_cond_make_true (zero_cond
);
3731 gimple_cond_make_false (zero_cond
);
3732 update_stmt (zero_cond
);
3733 *cfg_changed
= true;
3736 /* Helper function for arith_overflow_check_p. Return true
3737 if VAL1 is equal to VAL2 cast to corresponding integral type
3738 with other signedness or vice versa. */
3741 arith_cast_equal_p (tree val1
, tree val2
)
3743 if (TREE_CODE (val1
) == INTEGER_CST
&& TREE_CODE (val2
) == INTEGER_CST
)
3744 return wi::eq_p (wi::to_wide (val1
), wi::to_wide (val2
));
3745 else if (TREE_CODE (val1
) != SSA_NAME
|| TREE_CODE (val2
) != SSA_NAME
)
3747 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1
))
3748 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1
)) == val2
)
3750 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2
))
3751 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2
)) == val1
)
3756 /* Helper function of match_arith_overflow. Return 1
3757 if USE_STMT is unsigned overflow check ovf != 0 for
3758 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3762 arith_overflow_check_p (gimple
*stmt
, gimple
*cast_stmt
, gimple
*&use_stmt
,
3763 tree maxval
, tree
*other
)
3765 enum tree_code ccode
= ERROR_MARK
;
3766 tree crhs1
= NULL_TREE
, crhs2
= NULL_TREE
;
3767 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3768 tree lhs
= gimple_assign_lhs (cast_stmt
? cast_stmt
: stmt
);
3769 tree rhs1
= gimple_assign_rhs1 (stmt
);
3770 tree rhs2
= gimple_assign_rhs2 (stmt
);
3771 tree multop
= NULL_TREE
, divlhs
= NULL_TREE
;
3772 gimple
*cur_use_stmt
= use_stmt
;
3774 if (code
== MULT_EXPR
)
3776 if (!is_gimple_assign (use_stmt
))
3778 if (gimple_assign_rhs_code (use_stmt
) != TRUNC_DIV_EXPR
)
3780 if (gimple_assign_rhs1 (use_stmt
) != lhs
)
3784 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs1
))
3786 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
))
3791 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
3793 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
, 0))
3797 if (stmt_ends_bb_p (use_stmt
))
3799 divlhs
= gimple_assign_lhs (use_stmt
);
3803 if (!single_imm_use (divlhs
, &use
, &cur_use_stmt
))
3806 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3808 ccode
= gimple_cond_code (cur_use_stmt
);
3809 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3810 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3812 else if (is_gimple_assign (cur_use_stmt
))
3814 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3816 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3817 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3818 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3820 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3822 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3823 if (COMPARISON_CLASS_P (cond
))
3825 ccode
= TREE_CODE (cond
);
3826 crhs1
= TREE_OPERAND (cond
, 0);
3827 crhs2
= TREE_OPERAND (cond
, 1);
3838 if (TREE_CODE_CLASS (ccode
) != tcc_comparison
)
3847 /* r = a + b; r > maxval or r <= maxval */
3849 && TREE_CODE (crhs2
) == INTEGER_CST
3850 && tree_int_cst_equal (crhs2
, maxval
))
3851 return ccode
== GT_EXPR
? 1 : -1;
3854 /* r = a - b; r > a or r <= a
3855 r = a + b; a > r or a <= r or b > r or b <= r. */
3856 if ((code
== MINUS_EXPR
&& crhs1
== lhs
&& crhs2
== rhs1
)
3857 || (code
== PLUS_EXPR
&& (crhs1
== rhs1
|| crhs1
== rhs2
)
3859 return ccode
== GT_EXPR
? 1 : -1;
3860 /* r = ~a; b > r or b <= r. */
3861 if (code
== BIT_NOT_EXPR
&& crhs2
== lhs
)
3865 return ccode
== GT_EXPR
? 1 : -1;
3872 /* r = a - b; a < r or a >= r
3873 r = a + b; r < a or r >= a or r < b or r >= b. */
3874 if ((code
== MINUS_EXPR
&& crhs1
== rhs1
&& crhs2
== lhs
)
3875 || (code
== PLUS_EXPR
&& crhs1
== lhs
3876 && (crhs2
== rhs1
|| crhs2
== rhs2
)))
3877 return ccode
== LT_EXPR
? 1 : -1;
3878 /* r = ~a; r < b or r >= b. */
3879 if (code
== BIT_NOT_EXPR
&& crhs1
== lhs
)
3883 return ccode
== LT_EXPR
? 1 : -1;
3888 /* r = a * b; _1 = r / a; _1 == b
3889 r = a * b; _1 = r / b; _1 == a
3890 r = a * b; _1 = r / a; _1 != b
3891 r = a * b; _1 = r / b; _1 != a. */
3892 if (code
== MULT_EXPR
)
3896 if ((crhs1
== divlhs
&& arith_cast_equal_p (crhs2
, multop
))
3897 || (crhs2
== divlhs
&& arith_cast_equal_p (crhs1
, multop
)))
3899 use_stmt
= cur_use_stmt
;
3900 return ccode
== NE_EXPR
? 1 : -1;
3903 else if ((crhs1
== divlhs
&& operand_equal_p (crhs2
, multop
, 0))
3904 || (crhs2
== divlhs
&& crhs1
== multop
))
3906 use_stmt
= cur_use_stmt
;
3907 return ccode
== NE_EXPR
? 1 : -1;
3917 /* Recognize for unsigned x
3920 where there are other uses of x and replace it with
3921 _7 = .SUB_OVERFLOW (y, z);
3922 x = REALPART_EXPR <_7>;
3923 _8 = IMAGPART_EXPR <_7>;
3925 and similarly for addition.
3932 where y and z have unsigned types with maximum max
3933 and there are other uses of x and all of those cast x
3934 back to that unsigned type and again replace it with
3935 _7 = .ADD_OVERFLOW (y, z);
3936 _9 = REALPART_EXPR <_7>;
3937 _8 = IMAGPART_EXPR <_7>;
3939 and replace (utype) x with _9.
3945 _7 = .ADD_OVERFLOW (y, z);
3946 _8 = IMAGPART_EXPR <_7>;
3952 goto <bb 3>; [50.00%]
3954 goto <bb 4>; [50.00%]
3956 <bb 3> [local count: 536870913]:
3961 <bb 4> [local count: 1073741824]:
3962 # iftmp.0_3 = PHI <_10(3), 0(2)>
3964 _7 = .MUL_OVERFLOW (x, y);
3965 z = IMAGPART_EXPR <_7>;
3966 _8 = IMAGPART_EXPR <_7>;
3968 iftmp.0_3 = (int) _9; */
3971 match_arith_overflow (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
3972 enum tree_code code
, bool *cfg_changed
)
3974 tree lhs
= gimple_assign_lhs (stmt
);
3975 tree type
= TREE_TYPE (lhs
);
3976 use_operand_p use_p
;
3977 imm_use_iterator iter
;
3978 bool use_seen
= false;
3979 bool ovf_use_seen
= false;
3981 gimple
*add_stmt
= NULL
;
3982 bool add_first
= false;
3983 gimple
*cond_stmt
= NULL
;
3984 gimple
*cast_stmt
= NULL
;
3985 tree cast_lhs
= NULL_TREE
;
3987 gcc_checking_assert (code
== PLUS_EXPR
3988 || code
== MINUS_EXPR
3989 || code
== MULT_EXPR
3990 || code
== BIT_NOT_EXPR
);
3991 if (!INTEGRAL_TYPE_P (type
)
3992 || !TYPE_UNSIGNED (type
)
3993 || has_zero_uses (lhs
)
3994 || (code
!= PLUS_EXPR
3995 && code
!= MULT_EXPR
3996 && optab_handler (code
== MINUS_EXPR
? usubv4_optab
: uaddv4_optab
,
3997 TYPE_MODE (type
)) == CODE_FOR_nothing
))
4000 tree rhs1
= gimple_assign_rhs1 (stmt
);
4001 tree rhs2
= gimple_assign_rhs2 (stmt
);
4002 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4004 use_stmt
= USE_STMT (use_p
);
4005 if (is_gimple_debug (use_stmt
))
4008 tree other
= NULL_TREE
;
4009 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, NULL_TREE
, &other
))
4011 if (code
== BIT_NOT_EXPR
)
4014 if (TREE_CODE (other
) != SSA_NAME
)
4020 cond_stmt
= use_stmt
;
4022 ovf_use_seen
= true;
4027 if (code
== MULT_EXPR
4028 && cast_stmt
== NULL
4029 && gimple_assign_cast_p (use_stmt
))
4031 cast_lhs
= gimple_assign_lhs (use_stmt
);
4032 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
4033 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
4034 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
4035 == TYPE_PRECISION (TREE_TYPE (lhs
))))
4036 cast_stmt
= use_stmt
;
4038 cast_lhs
= NULL_TREE
;
4041 if (ovf_use_seen
&& use_seen
)
4046 && code
== MULT_EXPR
4049 if (TREE_CODE (rhs1
) != SSA_NAME
4050 || (TREE_CODE (rhs2
) != SSA_NAME
&& TREE_CODE (rhs2
) != INTEGER_CST
))
4052 FOR_EACH_IMM_USE_FAST (use_p
, iter
, cast_lhs
)
4054 use_stmt
= USE_STMT (use_p
);
4055 if (is_gimple_debug (use_stmt
))
4058 if (arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4060 ovf_use_seen
= true;
4066 cast_lhs
= NULL_TREE
;
4069 tree maxval
= NULL_TREE
;
4071 || (code
!= MULT_EXPR
&& (code
== BIT_NOT_EXPR
? use_seen
: !use_seen
))
4072 || (code
== PLUS_EXPR
4073 && optab_handler (uaddv4_optab
,
4074 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4075 || (code
== MULT_EXPR
4076 && optab_handler (cast_stmt
? mulv4_optab
: umulv4_optab
,
4077 TYPE_MODE (type
)) == CODE_FOR_nothing
))
4079 if (code
!= PLUS_EXPR
)
4081 if (TREE_CODE (rhs1
) != SSA_NAME
4082 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1
)))
4084 rhs1
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1
));
4085 tree type1
= TREE_TYPE (rhs1
);
4086 if (!INTEGRAL_TYPE_P (type1
)
4087 || !TYPE_UNSIGNED (type1
)
4088 || TYPE_PRECISION (type1
) >= TYPE_PRECISION (type
)
4089 || (TYPE_PRECISION (type1
)
4090 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1
))))
4092 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4094 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2
),
4095 TYPE_PRECISION (type1
),
4098 rhs2
= fold_convert (type1
, rhs2
);
4102 if (TREE_CODE (rhs2
) != SSA_NAME
4103 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2
)))
4105 rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2
));
4106 tree type2
= TREE_TYPE (rhs2
);
4107 if (!INTEGRAL_TYPE_P (type2
)
4108 || !TYPE_UNSIGNED (type2
)
4109 || TYPE_PRECISION (type2
) >= TYPE_PRECISION (type
)
4110 || (TYPE_PRECISION (type2
)
4111 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2
))))
4114 if (TYPE_PRECISION (type1
) >= TYPE_PRECISION (TREE_TYPE (rhs2
)))
4117 type
= TREE_TYPE (rhs2
);
4119 if (TREE_CODE (type
) != INTEGER_TYPE
4120 || optab_handler (uaddv4_optab
,
4121 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4124 maxval
= wide_int_to_tree (type
, wi::max_value (TYPE_PRECISION (type
),
4126 ovf_use_seen
= false;
4128 basic_block use_bb
= NULL
;
4129 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4131 use_stmt
= USE_STMT (use_p
);
4132 if (is_gimple_debug (use_stmt
))
4135 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, maxval
, NULL
))
4137 ovf_use_seen
= true;
4138 use_bb
= gimple_bb (use_stmt
);
4142 if (!gimple_assign_cast_p (use_stmt
)
4143 || gimple_assign_rhs_code (use_stmt
) == VIEW_CONVERT_EXPR
)
4145 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4146 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs
))
4147 || (TYPE_PRECISION (TREE_TYPE (use_lhs
))
4148 > TYPE_PRECISION (type
)))
4155 if (!useless_type_conversion_p (type
, TREE_TYPE (rhs1
)))
4159 tree new_rhs1
= make_ssa_name (type
);
4160 gimple
*g
= gimple_build_assign (new_rhs1
, NOP_EXPR
, rhs1
);
4161 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4164 else if (!useless_type_conversion_p (type
, TREE_TYPE (rhs2
)))
4168 tree new_rhs2
= make_ssa_name (type
);
4169 gimple
*g
= gimple_build_assign (new_rhs2
, NOP_EXPR
, rhs2
);
4170 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4175 /* If there are no uses of the wider addition, check if
4176 forwprop has not created a narrower addition.
4177 Require it to be in the same bb as the overflow check. */
4178 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs1
)
4180 use_stmt
= USE_STMT (use_p
);
4181 if (is_gimple_debug (use_stmt
))
4184 if (use_stmt
== stmt
)
4187 if (!is_gimple_assign (use_stmt
)
4188 || gimple_bb (use_stmt
) != use_bb
4189 || gimple_assign_rhs_code (use_stmt
) != PLUS_EXPR
)
4192 if (gimple_assign_rhs1 (use_stmt
) == rhs1
)
4194 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt
),
4198 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
4200 if (gimple_assign_rhs1 (use_stmt
) != rhs2
)
4206 add_stmt
= use_stmt
;
4209 if (add_stmt
== NULL
)
4212 /* If stmt and add_stmt are in the same bb, we need to find out
4213 which one is earlier. If they are in different bbs, we've
4214 checked add_stmt is in the same bb as one of the uses of the
4215 stmt lhs, so stmt needs to dominate add_stmt too. */
4216 if (gimple_bb (stmt
) == gimple_bb (add_stmt
))
4218 gimple_stmt_iterator gsif
= *gsi
;
4219 gimple_stmt_iterator gsib
= *gsi
;
4221 /* Search both forward and backward from stmt and have a small
4223 for (i
= 0; i
< 128; i
++)
4225 if (!gsi_end_p (gsib
))
4227 gsi_prev_nondebug (&gsib
);
4228 if (gsi_stmt (gsib
) == add_stmt
)
4234 else if (gsi_end_p (gsif
))
4236 if (!gsi_end_p (gsif
))
4238 gsi_next_nondebug (&gsif
);
4239 if (gsi_stmt (gsif
) == add_stmt
)
4246 *gsi
= gsi_for_stmt (add_stmt
);
4251 if (code
== BIT_NOT_EXPR
)
4252 *gsi
= gsi_for_stmt (cond_stmt
);
4254 auto_vec
<gimple
*, 8> mul_stmts
;
4255 if (code
== MULT_EXPR
&& cast_stmt
)
4257 type
= TREE_TYPE (cast_lhs
);
4258 gimple
*g
= SSA_NAME_DEF_STMT (rhs1
);
4259 if (gimple_assign_cast_p (g
)
4260 && useless_type_conversion_p (type
,
4261 TREE_TYPE (gimple_assign_rhs1 (g
)))
4262 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4263 rhs1
= gimple_assign_rhs1 (g
);
4266 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs1
);
4267 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4268 rhs1
= gimple_assign_lhs (g
);
4269 mul_stmts
.quick_push (g
);
4271 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4272 rhs2
= fold_convert (type
, rhs2
);
4275 g
= SSA_NAME_DEF_STMT (rhs2
);
4276 if (gimple_assign_cast_p (g
)
4277 && useless_type_conversion_p (type
,
4278 TREE_TYPE (gimple_assign_rhs1 (g
)))
4279 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4280 rhs2
= gimple_assign_rhs1 (g
);
4283 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs2
);
4284 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4285 rhs2
= gimple_assign_lhs (g
);
4286 mul_stmts
.quick_push (g
);
4290 tree ctype
= build_complex_type (type
);
4291 gcall
*g
= gimple_build_call_internal (code
== MULT_EXPR
4293 : code
!= MINUS_EXPR
4294 ? IFN_ADD_OVERFLOW
: IFN_SUB_OVERFLOW
,
4296 tree ctmp
= make_ssa_name (ctype
);
4297 gimple_call_set_lhs (g
, ctmp
);
4298 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4299 tree new_lhs
= (maxval
|| cast_stmt
) ? make_ssa_name (type
) : lhs
;
4301 if (code
!= BIT_NOT_EXPR
)
4303 g2
= gimple_build_assign (new_lhs
, REALPART_EXPR
,
4304 build1 (REALPART_EXPR
, type
, ctmp
));
4305 if (maxval
|| cast_stmt
)
4307 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4309 *gsi
= gsi_for_stmt (stmt
);
4312 gsi_replace (gsi
, g2
, true);
4313 if (code
== MULT_EXPR
)
4315 mul_stmts
.quick_push (g
);
4316 mul_stmts
.quick_push (g2
);
4319 g2
= gimple_build_assign (lhs
, NOP_EXPR
, new_lhs
);
4320 gsi_replace (gsi
, g2
, true);
4321 mul_stmts
.quick_push (g2
);
4325 tree ovf
= make_ssa_name (type
);
4326 g2
= gimple_build_assign (ovf
, IMAGPART_EXPR
,
4327 build1 (IMAGPART_EXPR
, type
, ctmp
));
4328 if (code
!= BIT_NOT_EXPR
)
4329 gsi_insert_after (gsi
, g2
, GSI_NEW_STMT
);
4331 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4332 if (code
== MULT_EXPR
)
4333 mul_stmts
.quick_push (g2
);
4335 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, cast_lhs
? cast_lhs
: lhs
)
4337 if (is_gimple_debug (use_stmt
))
4340 gimple
*orig_use_stmt
= use_stmt
;
4341 int ovf_use
= arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4345 gcc_assert (code
!= BIT_NOT_EXPR
);
4348 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4349 gimple_assign_set_rhs1 (use_stmt
, new_lhs
);
4350 if (useless_type_conversion_p (TREE_TYPE (use_lhs
),
4351 TREE_TYPE (new_lhs
)))
4352 gimple_assign_set_rhs_code (use_stmt
, SSA_NAME
);
4353 update_stmt (use_stmt
);
4357 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4359 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4360 gimple_cond_set_lhs (cond_stmt
, ovf
);
4361 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4362 gimple_cond_set_code (cond_stmt
, ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4366 gcc_checking_assert (is_gimple_assign (use_stmt
));
4367 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
4369 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4370 gimple_assign_set_rhs2 (use_stmt
, build_int_cst (type
, 0));
4371 gimple_assign_set_rhs_code (use_stmt
,
4372 ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4376 gcc_checking_assert (gimple_assign_rhs_code (use_stmt
)
4378 tree cond
= build2 (ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4379 boolean_type_node
, ovf
,
4380 build_int_cst (type
, 0));
4381 gimple_assign_set_rhs1 (use_stmt
, cond
);
4384 update_stmt (use_stmt
);
4385 if (code
== MULT_EXPR
&& use_stmt
!= orig_use_stmt
)
4387 gimple_stmt_iterator gsi2
= gsi_for_stmt (orig_use_stmt
);
4388 maybe_optimize_guarding_check (mul_stmts
, use_stmt
, orig_use_stmt
,
4390 gsi_remove (&gsi2
, true);
4391 release_ssa_name (gimple_assign_lhs (orig_use_stmt
));
4396 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
4397 gsi_remove (&gsi2
, true);
4400 gimple
*g
= gimple_build_assign (gimple_assign_lhs (add_stmt
),
4402 gsi2
= gsi_for_stmt (add_stmt
);
4403 gsi_replace (&gsi2
, g
, true);
4406 else if (code
== BIT_NOT_EXPR
)
4408 *gsi
= gsi_for_stmt (stmt
);
4409 gsi_remove (gsi
, true);
4410 release_ssa_name (lhs
);
4416 /* Return true if target has support for divmod. */
4419 target_supports_divmod_p (optab divmod_optab
, optab div_optab
, machine_mode mode
)
4421 /* If target supports hardware divmod insn, use it for divmod. */
4422 if (optab_handler (divmod_optab
, mode
) != CODE_FOR_nothing
)
4425 /* Check if libfunc for divmod is available. */
4426 rtx libfunc
= optab_libfunc (divmod_optab
, mode
);
4427 if (libfunc
!= NULL_RTX
)
4429 /* If optab_handler exists for div_optab, perhaps in a wider mode,
4430 we don't want to use the libfunc even if it exists for given mode. */
4431 machine_mode div_mode
;
4432 FOR_EACH_MODE_FROM (div_mode
, mode
)
4433 if (optab_handler (div_optab
, div_mode
) != CODE_FOR_nothing
)
4436 return targetm
.expand_divmod_libfunc
!= NULL
;
4442 /* Check if stmt is candidate for divmod transform. */
4445 divmod_candidate_p (gassign
*stmt
)
4447 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
4448 machine_mode mode
= TYPE_MODE (type
);
4449 optab divmod_optab
, div_optab
;
4451 if (TYPE_UNSIGNED (type
))
4453 divmod_optab
= udivmod_optab
;
4454 div_optab
= udiv_optab
;
4458 divmod_optab
= sdivmod_optab
;
4459 div_optab
= sdiv_optab
;
4462 tree op1
= gimple_assign_rhs1 (stmt
);
4463 tree op2
= gimple_assign_rhs2 (stmt
);
4465 /* Disable the transform if either is a constant, since division-by-constant
4466 may have specialized expansion. */
4467 if (CONSTANT_CLASS_P (op1
))
4470 if (CONSTANT_CLASS_P (op2
))
4472 if (integer_pow2p (op2
))
4475 if (TYPE_PRECISION (type
) <= HOST_BITS_PER_WIDE_INT
4476 && TYPE_PRECISION (type
) <= BITS_PER_WORD
)
4479 /* If the divisor is not power of 2 and the precision wider than
4480 HWI, expand_divmod punts on that, so in that case it is better
4481 to use divmod optab or libfunc. Similarly if choose_multiplier
4482 might need pre/post shifts of BITS_PER_WORD or more. */
4485 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
4486 expand using the [su]divv optabs. */
4487 if (TYPE_OVERFLOW_TRAPS (type
))
4490 if (!target_supports_divmod_p (divmod_optab
, div_optab
, mode
))
4496 /* This function looks for:
4497 t1 = a TRUNC_DIV_EXPR b;
4498 t2 = a TRUNC_MOD_EXPR b;
4499 and transforms it to the following sequence:
4500 complex_tmp = DIVMOD (a, b);
4501 t1 = REALPART_EXPR(a);
4502 t2 = IMAGPART_EXPR(b);
4503 For conditions enabling the transform see divmod_candidate_p().
4505 The pass has three parts:
4506 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
4507 other trunc_div_expr and trunc_mod_expr stmts.
4508 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
4510 3) Insert DIVMOD call just before top_stmt and update entries in
4511 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
4512 IMAGPART_EXPR for mod). */
4515 convert_to_divmod (gassign
*stmt
)
4517 if (stmt_can_throw_internal (cfun
, stmt
)
4518 || !divmod_candidate_p (stmt
))
4521 tree op1
= gimple_assign_rhs1 (stmt
);
4522 tree op2
= gimple_assign_rhs2 (stmt
);
4524 imm_use_iterator use_iter
;
4526 auto_vec
<gimple
*> stmts
;
4528 gimple
*top_stmt
= stmt
;
4529 basic_block top_bb
= gimple_bb (stmt
);
4531 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
4532 at-least stmt and possibly other trunc_div/trunc_mod stmts
4533 having same operands as stmt. */
4535 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, op1
)
4537 if (is_gimple_assign (use_stmt
)
4538 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
4539 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
4540 && operand_equal_p (op1
, gimple_assign_rhs1 (use_stmt
), 0)
4541 && operand_equal_p (op2
, gimple_assign_rhs2 (use_stmt
), 0))
4543 if (stmt_can_throw_internal (cfun
, use_stmt
))
4546 basic_block bb
= gimple_bb (use_stmt
);
4550 if (gimple_uid (use_stmt
) < gimple_uid (top_stmt
))
4551 top_stmt
= use_stmt
;
4553 else if (dominated_by_p (CDI_DOMINATORS
, top_bb
, bb
))
4556 top_stmt
= use_stmt
;
4561 tree top_op1
= gimple_assign_rhs1 (top_stmt
);
4562 tree top_op2
= gimple_assign_rhs2 (top_stmt
);
4564 stmts
.safe_push (top_stmt
);
4565 bool div_seen
= (gimple_assign_rhs_code (top_stmt
) == TRUNC_DIV_EXPR
);
4567 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
4568 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
4569 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
4570 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
4572 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, top_op1
)
4574 if (is_gimple_assign (use_stmt
)
4575 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
4576 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
4577 && operand_equal_p (top_op1
, gimple_assign_rhs1 (use_stmt
), 0)
4578 && operand_equal_p (top_op2
, gimple_assign_rhs2 (use_stmt
), 0))
4580 if (use_stmt
== top_stmt
4581 || stmt_can_throw_internal (cfun
, use_stmt
)
4582 || !dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
), top_bb
))
4585 stmts
.safe_push (use_stmt
);
4586 if (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
)
4594 /* Part 3: Create libcall to internal fn DIVMOD:
4595 divmod_tmp = DIVMOD (op1, op2). */
4597 gcall
*call_stmt
= gimple_build_call_internal (IFN_DIVMOD
, 2, op1
, op2
);
4598 tree res
= make_temp_ssa_name (build_complex_type (TREE_TYPE (op1
)),
4599 call_stmt
, "divmod_tmp");
4600 gimple_call_set_lhs (call_stmt
, res
);
4601 /* We rejected throwing statements above. */
4602 gimple_call_set_nothrow (call_stmt
, true);
4604 /* Insert the call before top_stmt. */
4605 gimple_stmt_iterator top_stmt_gsi
= gsi_for_stmt (top_stmt
);
4606 gsi_insert_before (&top_stmt_gsi
, call_stmt
, GSI_SAME_STMT
);
4608 widen_mul_stats
.divmod_calls_inserted
++;
4610 /* Update all statements in stmts vector:
4611 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
4612 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
4614 for (unsigned i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
4618 switch (gimple_assign_rhs_code (use_stmt
))
4620 case TRUNC_DIV_EXPR
:
4621 new_rhs
= fold_build1 (REALPART_EXPR
, TREE_TYPE (op1
), res
);
4624 case TRUNC_MOD_EXPR
:
4625 new_rhs
= fold_build1 (IMAGPART_EXPR
, TREE_TYPE (op1
), res
);
4632 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
4633 gimple_assign_set_rhs_from_tree (&gsi
, new_rhs
);
4634 update_stmt (use_stmt
);
4640 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
4641 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
4642 value is true iff we converted the statement. */
4645 convert_mult_to_highpart (gassign
*stmt
, gimple_stmt_iterator
*gsi
)
4647 tree lhs
= gimple_assign_lhs (stmt
);
4648 tree stype
= TREE_TYPE (lhs
);
4649 tree sarg0
= gimple_assign_rhs1 (stmt
);
4650 tree sarg1
= gimple_assign_rhs2 (stmt
);
4652 if (TREE_CODE (stype
) != INTEGER_TYPE
4653 || TREE_CODE (sarg1
) != INTEGER_CST
4654 || TREE_CODE (sarg0
) != SSA_NAME
4655 || !tree_fits_uhwi_p (sarg1
)
4656 || !has_single_use (sarg0
))
4659 gassign
*def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (sarg0
));
4663 enum tree_code mcode
= gimple_assign_rhs_code (def
);
4664 if (mcode
== NOP_EXPR
)
4666 tree tmp
= gimple_assign_rhs1 (def
);
4667 if (TREE_CODE (tmp
) != SSA_NAME
|| !has_single_use (tmp
))
4669 def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (tmp
));
4672 mcode
= gimple_assign_rhs_code (def
);
4675 if (mcode
!= WIDEN_MULT_EXPR
4676 || gimple_bb (def
) != gimple_bb (stmt
))
4678 tree mtype
= TREE_TYPE (gimple_assign_lhs (def
));
4679 if (TREE_CODE (mtype
) != INTEGER_TYPE
4680 || TYPE_PRECISION (mtype
) != TYPE_PRECISION (stype
))
4683 tree mop1
= gimple_assign_rhs1 (def
);
4684 tree mop2
= gimple_assign_rhs2 (def
);
4685 tree optype
= TREE_TYPE (mop1
);
4686 bool unsignedp
= TYPE_UNSIGNED (optype
);
4687 unsigned int prec
= TYPE_PRECISION (optype
);
4689 if (unsignedp
!= TYPE_UNSIGNED (mtype
)
4690 || TYPE_PRECISION (mtype
) != 2 * prec
)
4693 unsigned HOST_WIDE_INT bits
= tree_to_uhwi (sarg1
);
4694 if (bits
< prec
|| bits
>= 2 * prec
)
4697 /* For the time being, require operands to have the same sign. */
4698 if (unsignedp
!= TYPE_UNSIGNED (TREE_TYPE (mop2
)))
4701 machine_mode mode
= TYPE_MODE (optype
);
4702 optab tab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
4703 if (optab_handler (tab
, mode
) == CODE_FOR_nothing
)
4706 location_t loc
= gimple_location (stmt
);
4707 tree highpart1
= build_and_insert_binop (gsi
, loc
, "highparttmp",
4708 MULT_HIGHPART_EXPR
, mop1
, mop2
);
4709 tree highpart2
= highpart1
;
4710 tree ntype
= optype
;
4712 if (TYPE_UNSIGNED (stype
) != TYPE_UNSIGNED (optype
))
4714 ntype
= TYPE_UNSIGNED (stype
) ? unsigned_type_for (optype
)
4715 : signed_type_for (optype
);
4716 highpart2
= build_and_insert_cast (gsi
, loc
, ntype
, highpart1
);
4719 highpart2
= build_and_insert_binop (gsi
, loc
, "highparttmp",
4720 RSHIFT_EXPR
, highpart2
,
4721 build_int_cst (ntype
, bits
- prec
));
4723 gassign
*new_stmt
= gimple_build_assign (lhs
, NOP_EXPR
, highpart2
);
4724 gsi_replace (gsi
, new_stmt
, true);
4726 widen_mul_stats
.highpart_mults_inserted
++;
4730 /* If target has spaceship<MODE>3 expander, pattern recognize
4731 <bb 2> [local count: 1073741824]:
4732 if (a_2(D) == b_3(D))
4733 goto <bb 6>; [34.00%]
4735 goto <bb 3>; [66.00%]
4737 <bb 3> [local count: 708669601]:
4738 if (a_2(D) < b_3(D))
4739 goto <bb 6>; [1.04%]
4741 goto <bb 4>; [98.96%]
4743 <bb 4> [local count: 701299439]:
4744 if (a_2(D) > b_3(D))
4745 goto <bb 5>; [48.89%]
4747 goto <bb 6>; [51.11%]
4749 <bb 5> [local count: 342865295]:
4751 <bb 6> [local count: 1073741824]:
4753 <bb 2> [local count: 1073741824]:
4754 _1 = .SPACESHIP (a_2(D), b_3(D));
4756 goto <bb 6>; [34.00%]
4758 goto <bb 3>; [66.00%]
4760 <bb 3> [local count: 708669601]:
4762 goto <bb 6>; [1.04%]
4764 goto <bb 4>; [98.96%]
4766 <bb 4> [local count: 701299439]:
4768 goto <bb 5>; [48.89%]
4770 goto <bb 6>; [51.11%]
4772 <bb 5> [local count: 342865295]:
4774 <bb 6> [local count: 1073741824]:
4775 so that the backend can emit optimal comparison and
4776 conditional jump sequence. */
4779 optimize_spaceship (gcond
*stmt
)
4781 enum tree_code code
= gimple_cond_code (stmt
);
4782 if (code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4784 tree arg1
= gimple_cond_lhs (stmt
);
4785 tree arg2
= gimple_cond_rhs (stmt
);
4786 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
))
4787 || optab_handler (spaceship_optab
,
4788 TYPE_MODE (TREE_TYPE (arg1
))) == CODE_FOR_nothing
4789 || operand_equal_p (arg1
, arg2
, 0))
4792 basic_block bb0
= gimple_bb (stmt
), bb1
, bb2
= NULL
;
4793 edge em1
= NULL
, e1
= NULL
, e2
= NULL
;
4794 bb1
= EDGE_SUCC (bb0
, 1)->dest
;
4795 if (((EDGE_SUCC (bb0
, 0)->flags
& EDGE_TRUE_VALUE
) != 0) ^ (code
== EQ_EXPR
))
4796 bb1
= EDGE_SUCC (bb0
, 0)->dest
;
4798 gcond
*g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb1
));
4800 || !single_pred_p (bb1
)
4801 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
4802 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
4803 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
4804 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
4805 || !cond_only_block_p (bb1
))
4808 enum tree_code ccode
= (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
4809 ? LT_EXPR
: GT_EXPR
);
4810 switch (gimple_cond_code (g
))
4817 ccode
= ccode
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
4823 for (int i
= 0; i
< 2; ++i
)
4825 /* With NaNs, </<=/>/>= are false, so we need to look for the
4826 third comparison on the false edge from whatever non-equality
4827 comparison the second comparison is. */
4828 if (HONOR_NANS (TREE_TYPE (arg1
))
4829 && (EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0)
4832 bb2
= EDGE_SUCC (bb1
, i
)->dest
;
4833 g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb2
));
4835 || !single_pred_p (bb2
)
4836 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
4837 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
4838 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
4839 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
4840 || !cond_only_block_p (bb2
)
4841 || EDGE_SUCC (bb2
, 0)->dest
== EDGE_SUCC (bb2
, 1)->dest
)
4844 enum tree_code ccode2
4845 = (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0) ? LT_EXPR
: GT_EXPR
);
4846 switch (gimple_cond_code (g
))
4853 ccode2
= ccode2
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
4858 if (HONOR_NANS (TREE_TYPE (arg1
)) && ccode
== ccode2
)
4861 if ((ccode
== LT_EXPR
)
4862 ^ ((EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0))
4864 em1
= EDGE_SUCC (bb1
, 1 - i
);
4865 e1
= EDGE_SUCC (bb2
, 0);
4866 e2
= EDGE_SUCC (bb2
, 1);
4867 if ((ccode2
== LT_EXPR
) ^ ((e1
->flags
& EDGE_TRUE_VALUE
) == 0))
4872 e1
= EDGE_SUCC (bb1
, 1 - i
);
4873 em1
= EDGE_SUCC (bb2
, 0);
4874 e2
= EDGE_SUCC (bb2
, 1);
4875 if ((ccode2
!= LT_EXPR
) ^ ((em1
->flags
& EDGE_TRUE_VALUE
) == 0))
4876 std::swap (em1
, e2
);
4883 if ((ccode
== LT_EXPR
)
4884 ^ ((EDGE_SUCC (bb1
, 0)->flags
& EDGE_TRUE_VALUE
) != 0))
4886 em1
= EDGE_SUCC (bb1
, 1);
4887 e1
= EDGE_SUCC (bb1
, 0);
4888 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
4892 em1
= EDGE_SUCC (bb1
, 0);
4893 e1
= EDGE_SUCC (bb1
, 1);
4894 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
4898 gcall
*gc
= gimple_build_call_internal (IFN_SPACESHIP
, 2, arg1
, arg2
);
4899 tree lhs
= make_ssa_name (integer_type_node
);
4900 gimple_call_set_lhs (gc
, lhs
);
4901 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4902 gsi_insert_before (&gsi
, gc
, GSI_SAME_STMT
);
4904 gimple_cond_set_lhs (stmt
, lhs
);
4905 gimple_cond_set_rhs (stmt
, integer_zero_node
);
4908 gcond
*cond
= as_a
<gcond
*> (*gsi_last_bb (bb1
));
4909 gimple_cond_set_lhs (cond
, lhs
);
4910 if (em1
->src
== bb1
&& e2
!= em1
)
4912 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
4913 gimple_cond_set_code (cond
, (em1
->flags
& EDGE_TRUE_VALUE
)
4914 ? EQ_EXPR
: NE_EXPR
);
4918 gcc_assert (e1
->src
== bb1
&& e2
!= e1
);
4919 gimple_cond_set_rhs (cond
, integer_one_node
);
4920 gimple_cond_set_code (cond
, (e1
->flags
& EDGE_TRUE_VALUE
)
4921 ? EQ_EXPR
: NE_EXPR
);
4925 if (e2
!= e1
&& e2
!= em1
)
4927 cond
= as_a
<gcond
*> (*gsi_last_bb (bb2
));
4928 gimple_cond_set_lhs (cond
, lhs
);
4929 if (em1
->src
== bb2
)
4930 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
4933 gcc_assert (e1
->src
== bb2
);
4934 gimple_cond_set_rhs (cond
, integer_one_node
);
4936 gimple_cond_set_code (cond
,
4937 (e2
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
);
4941 wide_int wm1
= wi::minus_one (TYPE_PRECISION (integer_type_node
));
4942 wide_int w2
= wi::two (TYPE_PRECISION (integer_type_node
));
4943 value_range
vr (TREE_TYPE (lhs
), wm1
, w2
);
4944 set_range_info (lhs
, vr
);
4948 /* Find integer multiplications where the operands are extended from
4949 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
4950 or MULT_HIGHPART_EXPR where appropriate. */
4954 const pass_data pass_data_optimize_widening_mul
=
4956 GIMPLE_PASS
, /* type */
4957 "widening_mul", /* name */
4958 OPTGROUP_NONE
, /* optinfo_flags */
4959 TV_TREE_WIDEN_MUL
, /* tv_id */
4960 PROP_ssa
, /* properties_required */
4961 0, /* properties_provided */
4962 0, /* properties_destroyed */
4963 0, /* todo_flags_start */
4964 TODO_update_ssa
, /* todo_flags_finish */
4967 class pass_optimize_widening_mul
: public gimple_opt_pass
4970 pass_optimize_widening_mul (gcc::context
*ctxt
)
4971 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
4974 /* opt_pass methods: */
4975 bool gate (function
*) final override
4977 return flag_expensive_optimizations
&& optimize
;
4980 unsigned int execute (function
*) final override
;
4982 }; // class pass_optimize_widening_mul
4984 /* Walker class to perform the transformation in reverse dominance order. */
4986 class math_opts_dom_walker
: public dom_walker
4989 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
4990 if walking modidifes the CFG. */
4992 math_opts_dom_walker (bool *cfg_changed_p
)
4993 : dom_walker (CDI_DOMINATORS
), m_last_result_set (),
4994 m_cfg_changed_p (cfg_changed_p
) {}
4996 /* The actual actions performed in the walk. */
4998 void after_dom_children (basic_block
) final override
;
5000 /* Set of results of chains of multiply and add statement combinations that
5001 were not transformed into FMAs because of active deferring. */
5002 hash_set
<tree
> m_last_result_set
;
5004 /* Pointer to a flag of the user that needs to be set if CFG has been
5006 bool *m_cfg_changed_p
;
5010 math_opts_dom_walker::after_dom_children (basic_block bb
)
5012 gimple_stmt_iterator gsi
;
5014 fma_deferring_state
fma_state (param_avoid_fma_max_bits
> 0);
5016 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
5018 gimple
*stmt
= gsi_stmt (gsi
);
5019 enum tree_code code
;
5021 if (is_gimple_assign (stmt
))
5023 code
= gimple_assign_rhs_code (stmt
);
5027 if (!convert_mult_to_widen (stmt
, &gsi
)
5028 && !convert_expand_mult_copysign (stmt
, &gsi
)
5029 && convert_mult_to_fma (stmt
,
5030 gimple_assign_rhs1 (stmt
),
5031 gimple_assign_rhs2 (stmt
),
5034 gsi_remove (&gsi
, true);
5035 release_defs (stmt
);
5038 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
5043 if (!convert_plusminus_to_widen (&gsi
, stmt
, code
))
5044 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
5048 if (match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
))
5052 case TRUNC_MOD_EXPR
:
5053 convert_to_divmod (as_a
<gassign
*> (stmt
));
5057 convert_mult_to_highpart (as_a
<gassign
*> (stmt
), &gsi
);
5063 else if (is_gimple_call (stmt
))
5065 switch (gimple_call_combined_fn (stmt
))
5068 if (gimple_call_lhs (stmt
)
5069 && TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
5070 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
5072 && convert_mult_to_fma (stmt
,
5073 gimple_call_arg (stmt
, 0),
5074 gimple_call_arg (stmt
, 0),
5077 unlink_stmt_vdef (stmt
);
5078 if (gsi_remove (&gsi
, true)
5079 && gimple_purge_dead_eh_edges (bb
))
5080 *m_cfg_changed_p
= true;
5081 release_defs (stmt
);
5087 if (convert_mult_to_fma (stmt
,
5088 gimple_call_arg (stmt
, 1),
5089 gimple_call_arg (stmt
, 2),
5091 gimple_call_arg (stmt
, 0)))
5094 gsi_remove (&gsi
, true);
5095 release_defs (stmt
);
5101 cancel_fma_deferring (&fma_state
);
5108 else if (gimple_code (stmt
) == GIMPLE_COND
)
5109 optimize_spaceship (as_a
<gcond
*> (stmt
));
5112 if (fma_state
.m_deferring_p
5113 && fma_state
.m_initial_phi
)
5115 gcc_checking_assert (fma_state
.m_last_result
);
5116 if (!last_fma_candidate_feeds_initial_phi (&fma_state
,
5117 &m_last_result_set
))
5118 cancel_fma_deferring (&fma_state
);
5120 m_last_result_set
.add (fma_state
.m_last_result
);
5126 pass_optimize_widening_mul::execute (function
*fun
)
5128 bool cfg_changed
= false;
5130 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
5131 calculate_dominance_info (CDI_DOMINATORS
);
5132 renumber_gimple_stmt_uids (cfun
);
5134 math_opts_dom_walker (&cfg_changed
).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
5136 statistics_counter_event (fun
, "widening multiplications inserted",
5137 widen_mul_stats
.widen_mults_inserted
);
5138 statistics_counter_event (fun
, "widening maccs inserted",
5139 widen_mul_stats
.maccs_inserted
);
5140 statistics_counter_event (fun
, "fused multiply-adds inserted",
5141 widen_mul_stats
.fmas_inserted
);
5142 statistics_counter_event (fun
, "divmod calls inserted",
5143 widen_mul_stats
.divmod_calls_inserted
);
5144 statistics_counter_event (fun
, "highpart multiplications inserted",
5145 widen_mul_stats
.highpart_mults_inserted
);
5147 return cfg_changed
? TODO_cleanup_cfg
: 0;
5153 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
5155 return new pass_optimize_widening_mul (ctxt
);