1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-iterator.h"
104 #include "gimple-fold.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
116 #include "targhooks.h"
118 #include "tree-ssa-math-opts.h"
120 #include "cfghooks.h"
122 /* This structure represents one basic block that either computes a
123 division, or is a common dominator for basic block that compute a
126 /* The basic block represented by this structure. */
127 basic_block bb
= basic_block();
129 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
131 tree recip_def
= tree();
133 /* If non-NULL, the SSA_NAME holding the definition for a squared
134 reciprocal inserted in BB. */
135 tree square_recip_def
= tree();
137 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
138 was inserted in BB. */
139 gimple
*recip_def_stmt
= nullptr;
141 /* Pointer to a list of "struct occurrence"s for blocks dominated
143 struct occurrence
*children
= nullptr;
145 /* Pointer to the next "struct occurrence"s in the list of blocks
146 sharing a common dominator. */
147 struct occurrence
*next
= nullptr;
149 /* The number of divisions that are in BB before compute_merit. The
150 number of divisions that are in BB or post-dominate it after
152 int num_divisions
= 0;
154 /* True if the basic block has a division, false if it is a common
155 dominator for basic blocks that do. If it is false and trapping
156 math is active, BB is not a candidate for inserting a reciprocal. */
157 bool bb_has_division
= false;
159 /* Construct a struct occurrence for basic block BB, and whose
160 children list is headed by CHILDREN. */
161 occurrence (basic_block bb
, struct occurrence
*children
)
162 : bb (bb
), children (children
)
167 /* Destroy a struct occurrence and remove it from its basic block. */
173 /* Allocate memory for a struct occurrence from OCC_POOL. */
174 static void* operator new (size_t);
176 /* Return memory for a struct occurrence to OCC_POOL. */
177 static void operator delete (void*, size_t);
182 /* Number of 1.0/X ops inserted. */
185 /* Number of 1.0/FUNC ops inserted. */
191 /* Number of cexpi calls inserted. */
194 /* Number of conversions removed. */
201 /* Number of widening multiplication ops inserted. */
202 int widen_mults_inserted
;
204 /* Number of integer multiply-and-accumulate ops inserted. */
207 /* Number of fp fused multiply-add ops inserted. */
210 /* Number of divmod calls inserted. */
211 int divmod_calls_inserted
;
213 /* Number of highpart multiplication ops inserted. */
214 int highpart_mults_inserted
;
217 /* The instance of "struct occurrence" representing the highest
218 interesting block in the dominator tree. */
219 static struct occurrence
*occ_head
;
221 /* Allocation pool for getting instances of "struct occurrence". */
222 static object_allocator
<occurrence
> *occ_pool
;
224 void* occurrence::operator new (size_t n
)
226 gcc_assert (n
== sizeof(occurrence
));
227 return occ_pool
->allocate_raw ();
230 void occurrence::operator delete (void *occ
, size_t n
)
232 gcc_assert (n
== sizeof(occurrence
));
233 occ_pool
->remove_raw (occ
);
236 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
237 list of "struct occurrence"s, one per basic block, having IDOM as
238 their common dominator.
240 We try to insert NEW_OCC as deep as possible in the tree, and we also
241 insert any other block that is a common dominator for BB and one
242 block already in the tree. */
245 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
246 struct occurrence
**p_head
)
248 struct occurrence
*occ
, **p_occ
;
250 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
252 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
253 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
256 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
259 occ
->next
= new_occ
->children
;
260 new_occ
->children
= occ
;
262 /* Try the next block (it may as well be dominated by BB). */
265 else if (dom
== occ_bb
)
267 /* OCC_BB dominates BB. Tail recurse to look deeper. */
268 insert_bb (new_occ
, dom
, &occ
->children
);
272 else if (dom
!= idom
)
274 gcc_assert (!dom
->aux
);
276 /* There is a dominator between IDOM and BB, add it and make
277 two children out of NEW_OCC and OCC. First, remove OCC from
283 /* None of the previous blocks has DOM as a dominator: if we tail
284 recursed, we would reexamine them uselessly. Just switch BB with
285 DOM, and go on looking for blocks dominated by DOM. */
286 new_occ
= new occurrence (dom
, new_occ
);
291 /* Nothing special, go on with the next element. */
296 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
297 new_occ
->next
= *p_head
;
301 /* Register that we found a division in BB.
302 IMPORTANCE is a measure of how much weighting to give
303 that division. Use IMPORTANCE = 2 to register a single
304 division. If the division is going to be found multiple
305 times use 1 (as it is with squares). */
308 register_division_in (basic_block bb
, int importance
)
310 struct occurrence
*occ
;
312 occ
= (struct occurrence
*) bb
->aux
;
315 occ
= new occurrence (bb
, NULL
);
316 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
319 occ
->bb_has_division
= true;
320 occ
->num_divisions
+= importance
;
324 /* Compute the number of divisions that postdominate each block in OCC and
328 compute_merit (struct occurrence
*occ
)
330 struct occurrence
*occ_child
;
331 basic_block dom
= occ
->bb
;
333 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
336 if (occ_child
->children
)
337 compute_merit (occ_child
);
340 bb
= single_noncomplex_succ (dom
);
344 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
345 occ
->num_divisions
+= occ_child
->num_divisions
;
350 /* Return whether USE_STMT is a floating-point division by DEF. */
352 is_division_by (gimple
*use_stmt
, tree def
)
354 return is_gimple_assign (use_stmt
)
355 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
356 && gimple_assign_rhs2 (use_stmt
) == def
357 /* Do not recognize x / x as valid division, as we are getting
358 confused later by replacing all immediate uses x in such
360 && gimple_assign_rhs1 (use_stmt
) != def
361 && !stmt_can_throw_internal (cfun
, use_stmt
);
364 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
366 is_mult_by (gimple
*use_stmt
, tree def
, tree a
)
368 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
369 && gimple_assign_rhs_code (use_stmt
) == MULT_EXPR
)
371 tree op0
= gimple_assign_rhs1 (use_stmt
);
372 tree op1
= gimple_assign_rhs2 (use_stmt
);
374 return (op0
== def
&& op1
== a
)
375 || (op0
== a
&& op1
== def
);
380 /* Return whether USE_STMT is DEF * DEF. */
382 is_square_of (gimple
*use_stmt
, tree def
)
384 return is_mult_by (use_stmt
, def
, def
);
387 /* Return whether USE_STMT is a floating-point division by
390 is_division_by_square (gimple
*use_stmt
, tree def
)
392 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
393 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
394 && gimple_assign_rhs1 (use_stmt
) != gimple_assign_rhs2 (use_stmt
)
395 && !stmt_can_throw_internal (cfun
, use_stmt
))
397 tree denominator
= gimple_assign_rhs2 (use_stmt
);
398 if (TREE_CODE (denominator
) == SSA_NAME
)
399 return is_square_of (SSA_NAME_DEF_STMT (denominator
), def
);
404 /* Walk the subset of the dominator tree rooted at OCC, setting the
405 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
406 the given basic block. The field may be left NULL, of course,
407 if it is not possible or profitable to do the optimization.
409 DEF_BSI is an iterator pointing at the statement defining DEF.
410 If RECIP_DEF is set, a dominator already has a computation that can
413 If should_insert_square_recip is set, then this also inserts
414 the square of the reciprocal immediately after the definition
415 of the reciprocal. */
418 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
419 tree def
, tree recip_def
, tree square_recip_def
,
420 int should_insert_square_recip
, int threshold
)
423 gassign
*new_stmt
, *new_square_stmt
;
424 gimple_stmt_iterator gsi
;
425 struct occurrence
*occ_child
;
428 && (occ
->bb_has_division
|| !flag_trapping_math
)
429 /* Divide by two as all divisions are counted twice in
431 && occ
->num_divisions
/ 2 >= threshold
)
433 /* Make a variable with the replacement and substitute it. */
434 type
= TREE_TYPE (def
);
435 recip_def
= create_tmp_reg (type
, "reciptmp");
436 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
437 build_one_cst (type
), def
);
439 if (should_insert_square_recip
)
441 square_recip_def
= create_tmp_reg (type
, "powmult_reciptmp");
442 new_square_stmt
= gimple_build_assign (square_recip_def
, MULT_EXPR
,
443 recip_def
, recip_def
);
446 if (occ
->bb_has_division
)
448 /* Case 1: insert before an existing division. */
449 gsi
= gsi_after_labels (occ
->bb
);
450 while (!gsi_end_p (gsi
)
451 && (!is_division_by (gsi_stmt (gsi
), def
))
452 && (!is_division_by_square (gsi_stmt (gsi
), def
)))
455 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
456 if (should_insert_square_recip
)
457 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
459 else if (def_gsi
&& occ
->bb
== gsi_bb (*def_gsi
))
461 /* Case 2: insert right after the definition. Note that this will
462 never happen if the definition statement can throw, because in
463 that case the sole successor of the statement's basic block will
464 dominate all the uses as well. */
465 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
466 if (should_insert_square_recip
)
467 gsi_insert_after (def_gsi
, new_square_stmt
, GSI_NEW_STMT
);
471 /* Case 3: insert in a basic block not containing defs/uses. */
472 gsi
= gsi_after_labels (occ
->bb
);
473 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
474 if (should_insert_square_recip
)
475 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
478 reciprocal_stats
.rdivs_inserted
++;
480 occ
->recip_def_stmt
= new_stmt
;
483 occ
->recip_def
= recip_def
;
484 occ
->square_recip_def
= square_recip_def
;
485 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
486 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
,
487 square_recip_def
, should_insert_square_recip
,
491 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
492 Take as argument the use for (x * x). */
494 replace_reciprocal_squares (use_operand_p use_p
)
496 gimple
*use_stmt
= USE_STMT (use_p
);
497 basic_block bb
= gimple_bb (use_stmt
);
498 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
500 if (optimize_bb_for_speed_p (bb
) && occ
->square_recip_def
503 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
504 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
505 gimple_assign_set_rhs2 (use_stmt
, occ
->square_recip_def
);
506 SET_USE (use_p
, occ
->square_recip_def
);
507 fold_stmt_inplace (&gsi
);
508 update_stmt (use_stmt
);
513 /* Replace the division at USE_P with a multiplication by the reciprocal, if
517 replace_reciprocal (use_operand_p use_p
)
519 gimple
*use_stmt
= USE_STMT (use_p
);
520 basic_block bb
= gimple_bb (use_stmt
);
521 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
523 if (optimize_bb_for_speed_p (bb
)
524 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
526 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
527 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
528 SET_USE (use_p
, occ
->recip_def
);
529 fold_stmt_inplace (&gsi
);
530 update_stmt (use_stmt
);
535 /* Free OCC and return one more "struct occurrence" to be freed. */
537 static struct occurrence
*
538 free_bb (struct occurrence
*occ
)
540 struct occurrence
*child
, *next
;
542 /* First get the two pointers hanging off OCC. */
544 child
= occ
->children
;
547 /* Now ensure that we don't recurse unless it is necessary. */
553 next
= free_bb (next
);
559 /* Transform sequences like
569 depending on the uses of x, r1, r2. This removes one multiplication and
570 allows the sqrt and division operations to execute in parallel.
571 DEF_GSI is the gsi of the initial division by sqrt that defines
572 DEF (x in the example above). */
575 optimize_recip_sqrt (gimple_stmt_iterator
*def_gsi
, tree def
)
578 imm_use_iterator use_iter
;
579 gimple
*stmt
= gsi_stmt (*def_gsi
);
581 tree orig_sqrt_ssa_name
= gimple_assign_rhs2 (stmt
);
582 tree div_rhs1
= gimple_assign_rhs1 (stmt
);
584 if (TREE_CODE (orig_sqrt_ssa_name
) != SSA_NAME
585 || TREE_CODE (div_rhs1
) != REAL_CST
586 || !real_equal (&TREE_REAL_CST (div_rhs1
), &dconst1
))
590 = dyn_cast
<gcall
*> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name
));
592 if (!sqrt_stmt
|| !gimple_call_lhs (sqrt_stmt
))
595 switch (gimple_call_combined_fn (sqrt_stmt
))
604 tree a
= gimple_call_arg (sqrt_stmt
, 0);
606 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
608 /* Statements that use x in x * x. */
609 auto_vec
<gimple
*> sqr_stmts
;
610 /* Statements that use x in a * x. */
611 auto_vec
<gimple
*> mult_stmts
;
612 bool has_other_use
= false;
613 bool mult_on_main_path
= false;
615 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, x
)
617 if (is_gimple_debug (use_stmt
))
619 if (is_square_of (use_stmt
, x
))
621 sqr_stmts
.safe_push (use_stmt
);
622 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
623 mult_on_main_path
= true;
625 else if (is_mult_by (use_stmt
, x
, a
))
627 mult_stmts
.safe_push (use_stmt
);
628 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
629 mult_on_main_path
= true;
632 has_other_use
= true;
635 /* In the x * x and a * x cases we just rewire stmt operands or
636 remove multiplications. In the has_other_use case we introduce
637 a multiplication so make sure we don't introduce a multiplication
638 on a path where there was none. */
639 if (has_other_use
&& !mult_on_main_path
)
642 if (sqr_stmts
.is_empty () && mult_stmts
.is_empty ())
645 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
646 to be able to compose it from the sqr and mult cases. */
647 if (has_other_use
&& (sqr_stmts
.is_empty () || mult_stmts
.is_empty ()))
652 fprintf (dump_file
, "Optimizing reciprocal sqrt multiplications of\n");
653 print_gimple_stmt (dump_file
, sqrt_stmt
, 0, TDF_NONE
);
654 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
655 fprintf (dump_file
, "\n");
658 bool delete_div
= !has_other_use
;
659 tree sqr_ssa_name
= NULL_TREE
;
660 if (!sqr_stmts
.is_empty ())
662 /* r1 = x * x. Transform the original
669 = make_temp_ssa_name (TREE_TYPE (a
), NULL
, "recip_sqrt_sqr");
673 fprintf (dump_file
, "Replacing original division\n");
674 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
675 fprintf (dump_file
, "with new division\n");
678 = gimple_build_assign (sqr_ssa_name
, gimple_assign_rhs_code (stmt
),
679 gimple_assign_rhs1 (stmt
), a
);
680 gsi_insert_before (def_gsi
, stmt
, GSI_SAME_STMT
);
681 gsi_remove (def_gsi
, true);
682 *def_gsi
= gsi_for_stmt (stmt
);
683 fold_stmt_inplace (def_gsi
);
687 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
692 FOR_EACH_VEC_ELT (sqr_stmts
, i
, sqr_stmt
)
694 gimple_stmt_iterator gsi2
= gsi_for_stmt (sqr_stmt
);
695 gimple_assign_set_rhs_from_tree (&gsi2
, sqr_ssa_name
);
696 update_stmt (sqr_stmt
);
699 if (!mult_stmts
.is_empty ())
701 /* r2 = a * x. Transform this into:
702 r2 = t (The original sqrt (a)). */
704 gimple
*mult_stmt
= NULL
;
705 FOR_EACH_VEC_ELT (mult_stmts
, i
, mult_stmt
)
707 gimple_stmt_iterator gsi2
= gsi_for_stmt (mult_stmt
);
711 fprintf (dump_file
, "Replacing squaring multiplication\n");
712 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
713 fprintf (dump_file
, "with assignment\n");
715 gimple_assign_set_rhs_from_tree (&gsi2
, orig_sqrt_ssa_name
);
716 fold_stmt_inplace (&gsi2
);
717 update_stmt (mult_stmt
);
719 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
725 /* Using the two temporaries tmp1, tmp2 from above
726 the original x is now:
728 gcc_assert (orig_sqrt_ssa_name
);
729 gcc_assert (sqr_ssa_name
);
732 = gimple_build_assign (x
, MULT_EXPR
,
733 orig_sqrt_ssa_name
, sqr_ssa_name
);
734 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
739 /* Remove the original division. */
740 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
741 gsi_remove (&gsi2
, true);
745 release_ssa_name (x
);
748 /* Look for floating-point divisions among DEF's uses, and try to
749 replace them by multiplications with the reciprocal. Add
750 as many statements computing the reciprocal as needed.
752 DEF must be a GIMPLE register of a floating-point type. */
755 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
757 use_operand_p use_p
, square_use_p
;
758 imm_use_iterator use_iter
, square_use_iter
;
760 struct occurrence
*occ
;
763 int square_recip_count
= 0;
764 int sqrt_recip_count
= 0;
766 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && TREE_CODE (def
) == SSA_NAME
);
767 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
769 /* If DEF is a square (x * x), count the number of divisions by x.
770 If there are more divisions by x than by (DEF * DEF), prefer to optimize
771 the reciprocal of x instead of DEF. This improves cases like:
776 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
777 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
779 if (is_gimple_assign (def_stmt
)
780 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
781 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
782 && gimple_assign_rhs1 (def_stmt
) == gimple_assign_rhs2 (def_stmt
))
784 tree op0
= gimple_assign_rhs1 (def_stmt
);
786 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, op0
)
788 gimple
*use_stmt
= USE_STMT (use_p
);
789 if (is_division_by (use_stmt
, op0
))
794 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
796 gimple
*use_stmt
= USE_STMT (use_p
);
797 if (is_division_by (use_stmt
, def
))
799 register_division_in (gimple_bb (use_stmt
), 2);
803 if (is_square_of (use_stmt
, def
))
805 square_def
= gimple_assign_lhs (use_stmt
);
806 FOR_EACH_IMM_USE_FAST (square_use_p
, square_use_iter
, square_def
)
808 gimple
*square_use_stmt
= USE_STMT (square_use_p
);
809 if (is_division_by (square_use_stmt
, square_def
))
811 /* This is executed twice for each division by a square. */
812 register_division_in (gimple_bb (square_use_stmt
), 1);
813 square_recip_count
++;
819 /* Square reciprocals were counted twice above. */
820 square_recip_count
/= 2;
822 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
823 if (sqrt_recip_count
> square_recip_count
)
826 /* Do the expensive part only if we can hope to optimize something. */
827 if (count
+ square_recip_count
>= threshold
&& count
>= 1)
830 for (occ
= occ_head
; occ
; occ
= occ
->next
)
833 insert_reciprocals (def_gsi
, occ
, def
, NULL
, NULL
,
834 square_recip_count
, threshold
);
837 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
839 if (is_division_by (use_stmt
, def
))
841 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
842 replace_reciprocal (use_p
);
844 else if (square_recip_count
> 0 && is_square_of (use_stmt
, def
))
846 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
848 /* Find all uses of the square that are divisions and
849 * replace them by multiplications with the inverse. */
850 imm_use_iterator square_iterator
;
851 gimple
*powmult_use_stmt
= USE_STMT (use_p
);
852 tree powmult_def_name
= gimple_assign_lhs (powmult_use_stmt
);
854 FOR_EACH_IMM_USE_STMT (powmult_use_stmt
,
855 square_iterator
, powmult_def_name
)
856 FOR_EACH_IMM_USE_ON_STMT (square_use_p
, square_iterator
)
858 gimple
*powmult_use_stmt
= USE_STMT (square_use_p
);
859 if (is_division_by (powmult_use_stmt
, powmult_def_name
))
860 replace_reciprocal_squares (square_use_p
);
868 for (occ
= occ_head
; occ
; )
874 /* Return an internal function that implements the reciprocal of CALL,
875 or IFN_LAST if there is no such function that the target supports. */
878 internal_fn_reciprocal (gcall
*call
)
882 switch (gimple_call_combined_fn (call
))
893 tree_pair types
= direct_internal_fn_types (ifn
, call
);
894 if (!direct_internal_fn_supported_p (ifn
, types
, OPTIMIZE_FOR_SPEED
))
900 /* Go through all the floating-point SSA_NAMEs, and call
901 execute_cse_reciprocals_1 on each of them. */
904 const pass_data pass_data_cse_reciprocals
=
906 GIMPLE_PASS
, /* type */
908 OPTGROUP_NONE
, /* optinfo_flags */
909 TV_TREE_RECIP
, /* tv_id */
910 PROP_ssa
, /* properties_required */
911 0, /* properties_provided */
912 0, /* properties_destroyed */
913 0, /* todo_flags_start */
914 TODO_update_ssa
, /* todo_flags_finish */
917 class pass_cse_reciprocals
: public gimple_opt_pass
920 pass_cse_reciprocals (gcc::context
*ctxt
)
921 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
924 /* opt_pass methods: */
925 bool gate (function
*) final override
927 return optimize
&& flag_reciprocal_math
;
929 unsigned int execute (function
*) final override
;
931 }; // class pass_cse_reciprocals
934 pass_cse_reciprocals::execute (function
*fun
)
939 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
941 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
942 calculate_dominance_info (CDI_DOMINATORS
);
943 calculate_dominance_info (CDI_POST_DOMINATORS
);
946 FOR_EACH_BB_FN (bb
, fun
)
947 gcc_assert (!bb
->aux
);
949 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
950 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
951 && is_gimple_reg (arg
))
953 tree name
= ssa_default_def (fun
, arg
);
955 execute_cse_reciprocals_1 (NULL
, name
);
958 FOR_EACH_BB_FN (bb
, fun
)
962 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
965 gphi
*phi
= gsi
.phi ();
966 def
= PHI_RESULT (phi
);
967 if (! virtual_operand_p (def
)
968 && FLOAT_TYPE_P (TREE_TYPE (def
)))
969 execute_cse_reciprocals_1 (NULL
, def
);
972 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
975 gimple
*stmt
= gsi_stmt (gsi
);
977 if (gimple_has_lhs (stmt
)
978 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
979 && FLOAT_TYPE_P (TREE_TYPE (def
))
980 && TREE_CODE (def
) == SSA_NAME
)
982 execute_cse_reciprocals_1 (&gsi
, def
);
983 stmt
= gsi_stmt (gsi
);
984 if (flag_unsafe_math_optimizations
985 && is_gimple_assign (stmt
)
986 && gimple_assign_lhs (stmt
) == def
987 && !stmt_can_throw_internal (cfun
, stmt
)
988 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
989 optimize_recip_sqrt (&gsi
, def
);
993 if (optimize_bb_for_size_p (bb
))
996 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
997 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
1000 gimple
*stmt
= gsi_stmt (gsi
);
1002 if (is_gimple_assign (stmt
)
1003 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
1005 tree arg1
= gimple_assign_rhs2 (stmt
);
1008 if (TREE_CODE (arg1
) != SSA_NAME
)
1011 stmt1
= SSA_NAME_DEF_STMT (arg1
);
1013 if (is_gimple_call (stmt1
)
1014 && gimple_call_lhs (stmt1
))
1017 imm_use_iterator ui
;
1018 use_operand_p use_p
;
1019 tree fndecl
= NULL_TREE
;
1021 gcall
*call
= as_a
<gcall
*> (stmt1
);
1022 internal_fn ifn
= internal_fn_reciprocal (call
);
1023 if (ifn
== IFN_LAST
)
1025 fndecl
= gimple_call_fndecl (call
);
1027 || !fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
1029 fndecl
= targetm
.builtin_reciprocal (fndecl
);
1034 /* Check that all uses of the SSA name are divisions,
1035 otherwise replacing the defining statement will do
1038 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
1040 gimple
*stmt2
= USE_STMT (use_p
);
1041 if (is_gimple_debug (stmt2
))
1043 if (!is_gimple_assign (stmt2
)
1044 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
1045 || gimple_assign_rhs1 (stmt2
) == arg1
1046 || gimple_assign_rhs2 (stmt2
) != arg1
)
1055 gimple_replace_ssa_lhs (call
, arg1
);
1056 if (gimple_call_internal_p (call
) != (ifn
!= IFN_LAST
))
1058 auto_vec
<tree
, 4> args
;
1059 for (unsigned int i
= 0;
1060 i
< gimple_call_num_args (call
); i
++)
1061 args
.safe_push (gimple_call_arg (call
, i
));
1063 if (ifn
== IFN_LAST
)
1064 stmt2
= gimple_build_call_vec (fndecl
, args
);
1066 stmt2
= gimple_build_call_internal_vec (ifn
, args
);
1067 gimple_call_set_lhs (stmt2
, arg1
);
1068 gimple_move_vops (stmt2
, call
);
1069 gimple_call_set_nothrow (stmt2
,
1070 gimple_call_nothrow_p (call
));
1071 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
1072 gsi_replace (&gsi2
, stmt2
, true);
1076 if (ifn
== IFN_LAST
)
1077 gimple_call_set_fndecl (call
, fndecl
);
1079 gimple_call_set_internal_fn (call
, ifn
);
1082 reciprocal_stats
.rfuncs_inserted
++;
1084 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
1086 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1087 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
1088 fold_stmt_inplace (&gsi
);
1096 statistics_counter_event (fun
, "reciprocal divs inserted",
1097 reciprocal_stats
.rdivs_inserted
);
1098 statistics_counter_event (fun
, "reciprocal functions inserted",
1099 reciprocal_stats
.rfuncs_inserted
);
1101 free_dominance_info (CDI_DOMINATORS
);
1102 free_dominance_info (CDI_POST_DOMINATORS
);
1110 make_pass_cse_reciprocals (gcc::context
*ctxt
)
1112 return new pass_cse_reciprocals (ctxt
);
1115 /* If NAME is the result of a type conversion, look for other
1116 equivalent dominating or dominated conversions, and replace all
1117 uses with the earliest dominating name, removing the redundant
1118 conversions. Return the prevailing name. */
1121 execute_cse_conv_1 (tree name
, bool *cfg_changed
)
1123 if (SSA_NAME_IS_DEFAULT_DEF (name
)
1124 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name
))
1127 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
1129 if (!gimple_assign_cast_p (def_stmt
))
1132 tree src
= gimple_assign_rhs1 (def_stmt
);
1134 if (TREE_CODE (src
) != SSA_NAME
)
1137 imm_use_iterator use_iter
;
1140 /* Find the earliest dominating def. */
1141 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1143 if (use_stmt
== def_stmt
1144 || !gimple_assign_cast_p (use_stmt
))
1147 tree lhs
= gimple_assign_lhs (use_stmt
);
1149 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1150 || (gimple_assign_rhs1 (use_stmt
)
1151 != gimple_assign_rhs1 (def_stmt
))
1152 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1156 if (gimple_bb (def_stmt
) == gimple_bb (use_stmt
))
1158 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1159 while (!gsi_end_p (gsi
) && gsi_stmt (gsi
) != def_stmt
)
1161 use_dominates
= !gsi_end_p (gsi
);
1163 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
),
1164 gimple_bb (def_stmt
)))
1165 use_dominates
= false;
1166 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (def_stmt
),
1167 gimple_bb (use_stmt
)))
1168 use_dominates
= true;
1174 std::swap (name
, lhs
);
1175 std::swap (def_stmt
, use_stmt
);
1179 /* Now go through all uses of SRC again, replacing the equivalent
1180 dominated conversions. We may replace defs that were not
1181 dominated by the then-prevailing defs when we first visited
1183 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1185 if (use_stmt
== def_stmt
1186 || !gimple_assign_cast_p (use_stmt
))
1189 tree lhs
= gimple_assign_lhs (use_stmt
);
1191 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1192 || (gimple_assign_rhs1 (use_stmt
)
1193 != gimple_assign_rhs1 (def_stmt
))
1194 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1197 basic_block use_bb
= gimple_bb (use_stmt
);
1198 if (gimple_bb (def_stmt
) == use_bb
1199 || dominated_by_p (CDI_DOMINATORS
, use_bb
, gimple_bb (def_stmt
)))
1201 sincos_stats
.conv_removed
++;
1203 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1204 replace_uses_by (lhs
, name
);
1205 if (gsi_remove (&gsi
, true)
1206 && gimple_purge_dead_eh_edges (use_bb
))
1207 *cfg_changed
= true;
1208 release_defs (use_stmt
);
1215 /* Records an occurrence at statement USE_STMT in the vector of trees
1216 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1217 is not yet initialized. Returns true if the occurrence was pushed on
1218 the vector. Adjusts *TOP_BB to be the basic block dominating all
1219 statements in the vector. */
1222 maybe_record_sincos (vec
<gimple
*> *stmts
,
1223 basic_block
*top_bb
, gimple
*use_stmt
)
1225 basic_block use_bb
= gimple_bb (use_stmt
);
1227 && (*top_bb
== use_bb
1228 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
1229 stmts
->safe_push (use_stmt
);
1231 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
1233 stmts
->safe_push (use_stmt
);
1242 /* Look for sin, cos and cexpi calls with the same argument NAME and
1243 create a single call to cexpi CSEing the result in this case.
1244 We first walk over all immediate uses of the argument collecting
1245 statements that we can CSE in a vector and in a second pass replace
1246 the statement rhs with a REALPART or IMAGPART expression on the
1247 result of the cexpi call we insert before the use statement that
1248 dominates all other candidates. */
1251 execute_cse_sincos_1 (tree name
)
1253 gimple_stmt_iterator gsi
;
1254 imm_use_iterator use_iter
;
1255 tree fndecl
, res
, type
= NULL_TREE
;
1256 gimple
*def_stmt
, *use_stmt
, *stmt
;
1257 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
1258 auto_vec
<gimple
*> stmts
;
1259 basic_block top_bb
= NULL
;
1261 bool cfg_changed
= false;
1263 name
= execute_cse_conv_1 (name
, &cfg_changed
);
1265 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
1267 if (gimple_code (use_stmt
) != GIMPLE_CALL
1268 || !gimple_call_lhs (use_stmt
))
1271 switch (gimple_call_combined_fn (use_stmt
))
1274 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1278 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1282 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1289 tree t
= mathfn_built_in_type (gimple_call_combined_fn (use_stmt
));
1293 t
= TREE_TYPE (name
);
1295 /* This checks that NAME has the right type in the first round,
1296 and, in subsequent rounds, that the built_in type is the same
1297 type, or a compatible type. */
1298 if (type
!= t
&& !types_compatible_p (type
, t
))
1301 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
1304 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1305 the name def statement. */
1306 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
1309 stmt
= gimple_build_call (fndecl
, 1, name
);
1310 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
1311 gimple_call_set_lhs (stmt
, res
);
1313 def_stmt
= SSA_NAME_DEF_STMT (name
);
1314 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
1315 && gimple_code (def_stmt
) != GIMPLE_PHI
1316 && gimple_bb (def_stmt
) == top_bb
)
1318 gsi
= gsi_for_stmt (def_stmt
);
1319 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1323 gsi
= gsi_after_labels (top_bb
);
1324 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1326 sincos_stats
.inserted
++;
1328 /* And adjust the recorded old call sites. */
1329 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
1333 switch (gimple_call_combined_fn (use_stmt
))
1336 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
1340 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
1351 /* Replace call with a copy. */
1352 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
1354 gsi
= gsi_for_stmt (use_stmt
);
1355 gsi_replace (&gsi
, stmt
, true);
1356 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
1363 /* To evaluate powi(x,n), the floating point value x raised to the
1364 constant integer exponent n, we use a hybrid algorithm that
1365 combines the "window method" with look-up tables. For an
1366 introduction to exponentiation algorithms and "addition chains",
1367 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1368 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1369 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1370 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1372 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1373 multiplications to inline before calling the system library's pow
1374 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1375 so this default never requires calling pow, powf or powl. */
1377 #ifndef POWI_MAX_MULTS
1378 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1381 /* The size of the "optimal power tree" lookup table. All
1382 exponents less than this value are simply looked up in the
1383 powi_table below. This threshold is also used to size the
1384 cache of pseudo registers that hold intermediate results. */
1385 #define POWI_TABLE_SIZE 256
1387 /* The size, in bits of the window, used in the "window method"
1388 exponentiation algorithm. This is equivalent to a radix of
1389 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1390 #define POWI_WINDOW_SIZE 3
1392 /* The following table is an efficient representation of an
1393 "optimal power tree". For each value, i, the corresponding
1394 value, j, in the table states than an optimal evaluation
1395 sequence for calculating pow(x,i) can be found by evaluating
1396 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1397 100 integers is given in Knuth's "Seminumerical algorithms". */
1399 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
1401 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1402 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1403 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1404 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1405 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1406 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1407 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1408 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1409 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1410 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1411 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1412 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1413 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1414 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1415 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1416 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1417 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1418 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1419 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1420 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1421 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1422 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1423 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1424 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1425 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1426 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1427 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1428 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1429 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1430 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1431 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1432 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1436 /* Return the number of multiplications required to calculate
1437 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1438 subroutine of powi_cost. CACHE is an array indicating
1439 which exponents have already been calculated. */
1442 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
1444 /* If we've already calculated this exponent, then this evaluation
1445 doesn't require any additional multiplications. */
1450 return powi_lookup_cost (n
- powi_table
[n
], cache
)
1451 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
1454 /* Return the number of multiplications required to calculate
1455 powi(x,n) for an arbitrary x, given the exponent N. This
1456 function needs to be kept in sync with powi_as_mults below. */
1459 powi_cost (HOST_WIDE_INT n
)
1461 bool cache
[POWI_TABLE_SIZE
];
1462 unsigned HOST_WIDE_INT digit
;
1463 unsigned HOST_WIDE_INT val
;
1469 /* Ignore the reciprocal when calculating the cost. */
1472 /* Initialize the exponent cache. */
1473 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
1478 while (val
>= POWI_TABLE_SIZE
)
1482 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
1483 result
+= powi_lookup_cost (digit
, cache
)
1484 + POWI_WINDOW_SIZE
+ 1;
1485 val
>>= POWI_WINDOW_SIZE
;
1494 return result
+ powi_lookup_cost (val
, cache
);
1497 /* Recursive subroutine of powi_as_mults. This function takes the
1498 array, CACHE, of already calculated exponents and an exponent N and
1499 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1502 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1503 unsigned HOST_WIDE_INT n
, tree
*cache
)
1505 tree op0
, op1
, ssa_target
;
1506 unsigned HOST_WIDE_INT digit
;
1509 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1512 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1514 if (n
< POWI_TABLE_SIZE
)
1516 cache
[n
] = ssa_target
;
1517 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1518 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1522 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1523 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1524 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1528 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1532 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1533 gimple_set_location (mult_stmt
, loc
);
1534 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1539 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1540 This function needs to be kept in sync with powi_cost above. */
1543 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1544 tree arg0
, HOST_WIDE_INT n
)
1546 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1551 return build_one_cst (type
);
1553 memset (cache
, 0, sizeof (cache
));
1556 result
= powi_as_mults_1 (gsi
, loc
, type
, absu_hwi (n
), cache
);
1560 /* If the original exponent was negative, reciprocate the result. */
1561 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1562 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1563 build_real (type
, dconst1
), result
);
1564 gimple_set_location (div_stmt
, loc
);
1565 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1570 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1571 location info LOC. If the arguments are appropriate, create an
1572 equivalent sequence of statements prior to GSI using an optimal
1573 number of multiplications, and return an expession holding the
1577 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1578 tree arg0
, HOST_WIDE_INT n
)
1580 if ((n
>= -1 && n
<= 2)
1581 || (optimize_function_for_speed_p (cfun
)
1582 && powi_cost (n
) <= POWI_MAX_MULTS
))
1583 return powi_as_mults (gsi
, loc
, arg0
, n
);
1588 /* Build a gimple call statement that calls FN with argument ARG.
1589 Set the lhs of the call statement to a fresh SSA name. Insert the
1590 statement prior to GSI's current position, and return the fresh
1594 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1600 call_stmt
= gimple_build_call (fn
, 1, arg
);
1601 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1602 gimple_set_lhs (call_stmt
, ssa_target
);
1603 gimple_set_location (call_stmt
, loc
);
1604 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1609 /* Build a gimple binary operation with the given CODE and arguments
1610 ARG0, ARG1, assigning the result to a new SSA name for variable
1611 TARGET. Insert the statement prior to GSI's current position, and
1612 return the fresh SSA name.*/
1615 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1616 const char *name
, enum tree_code code
,
1617 tree arg0
, tree arg1
)
1619 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1620 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1621 gimple_set_location (stmt
, loc
);
1622 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1626 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1627 prior to GSI's current position, and return the fresh SSA name. */
1630 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1631 tree type
, tree val
)
1633 tree result
= make_ssa_name (type
);
1634 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1635 gimple_set_location (stmt
, loc
);
1636 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1640 struct pow_synth_sqrt_info
1643 unsigned int deepest
;
1644 unsigned int num_mults
;
1647 /* Return true iff the real value C can be represented as a
1648 sum of powers of 0.5 up to N. That is:
1649 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1650 Record in INFO the various parameters of the synthesis algorithm such
1651 as the factors a[i], the maximum 0.5 power and the number of
1652 multiplications that will be required. */
1655 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1656 struct pow_synth_sqrt_info
*info
)
1658 REAL_VALUE_TYPE factor
= dconsthalf
;
1659 REAL_VALUE_TYPE remainder
= c
;
1662 info
->num_mults
= 0;
1663 memset (info
->factors
, 0, n
* sizeof (bool));
1665 for (unsigned i
= 0; i
< n
; i
++)
1667 REAL_VALUE_TYPE res
;
1669 /* If something inexact happened bail out now. */
1670 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1673 /* We have hit zero. The number is representable as a sum
1674 of powers of 0.5. */
1675 if (real_equal (&res
, &dconst0
))
1677 info
->factors
[i
] = true;
1678 info
->deepest
= i
+ 1;
1681 else if (!REAL_VALUE_NEGATIVE (res
))
1684 info
->factors
[i
] = true;
1688 info
->factors
[i
] = false;
1690 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1695 /* Return the tree corresponding to FN being applied
1696 to ARG N times at GSI and LOC.
1697 Look up previous results from CACHE if need be.
1698 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1701 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1702 tree fn
, location_t loc
, tree
*cache
)
1704 tree res
= cache
[n
];
1707 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1708 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1715 /* Print to STREAM the repeated application of function FNAME to ARG
1716 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1720 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1724 fprintf (stream
, "%s", arg
);
1727 fprintf (stream
, "%s (", fname
);
1728 print_nested_fn (stream
, fname
, arg
, n
- 1);
1729 fprintf (stream
, ")");
1733 /* Print to STREAM the fractional sequence of sqrt chains
1734 applied to ARG, described by INFO. Used for the dump file. */
1737 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1738 struct pow_synth_sqrt_info
*info
)
1740 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1742 bool is_set
= info
->factors
[i
];
1745 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1746 if (i
!= info
->deepest
- 1)
1747 fprintf (stream
, " * ");
1752 /* Print to STREAM a representation of raising ARG to an integer
1753 power N. Used for the dump file. */
1756 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1759 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1761 fprintf (stream
, "%s", arg
);
1764 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1765 square roots. Place at GSI and LOC. Limit the maximum depth
1766 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1767 result of the expanded sequence or NULL_TREE if the expansion failed.
1769 This routine assumes that ARG1 is a real number with a fractional part
1770 (the integer exponent case will have been handled earlier in
1771 gimple_expand_builtin_pow).
1774 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1775 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1776 FRAC_PART == ARG1 - WHOLE_PART:
1777 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1778 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1779 if it can be expressed as such, that is if FRAC_PART satisfies:
1780 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1781 where integer a[i] is either 0 or 1.
1784 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1785 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1787 For ARG1 < 0.0 there are two approaches:
1788 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1789 is calculated as above.
1792 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1793 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1795 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1796 FRAC_PART := ARG1 - WHOLE_PART
1797 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1799 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1800 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1802 For ARG1 < 0.0 we choose between (A) and (B) depending on
1803 how many multiplications we'd have to do.
1804 So, for the example in (B): POW (x, -5.875), if we were to
1805 follow algorithm (A) we would produce:
1806 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1807 which contains more multiplications than approach (B).
1809 Hopefully, this approach will eliminate potentially expensive POW library
1810 calls when unsafe floating point math is enabled and allow the compiler to
1811 further optimise the multiplies, square roots and divides produced by this
1815 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1816 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1818 tree type
= TREE_TYPE (arg0
);
1819 machine_mode mode
= TYPE_MODE (type
);
1820 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1821 bool one_over
= true;
1826 if (TREE_CODE (arg1
) != REAL_CST
)
1829 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1831 gcc_assert (max_depth
> 0);
1832 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1834 struct pow_synth_sqrt_info synth_info
;
1835 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1836 synth_info
.deepest
= 0;
1837 synth_info
.num_mults
= 0;
1839 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1840 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1842 /* The whole and fractional parts of exp. */
1843 REAL_VALUE_TYPE whole_part
;
1844 REAL_VALUE_TYPE frac_part
;
1846 real_floor (&whole_part
, mode
, &exp
);
1847 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1850 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1851 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1855 real_ceil (&ceil_whole
, mode
, &exp
);
1856 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1859 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1862 /* Check whether it's more profitable to not use 1.0 / ... */
1865 struct pow_synth_sqrt_info alt_synth_info
;
1866 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1867 alt_synth_info
.deepest
= 0;
1868 alt_synth_info
.num_mults
= 0;
1870 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1872 && alt_synth_info
.deepest
<= synth_info
.deepest
1873 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1875 whole_part
= ceil_whole
;
1876 frac_part
= ceil_fract
;
1877 synth_info
.deepest
= alt_synth_info
.deepest
;
1878 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1879 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1880 (max_depth
+ 1) * sizeof (bool));
1885 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1886 REAL_VALUE_TYPE cint
;
1887 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1889 if (!real_identical (&whole_part
, &cint
))
1892 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1895 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1897 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1899 /* Calculate the integer part of the exponent. */
1902 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1911 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1912 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1918 fprintf (dump_file
, "1.0 / (");
1919 dump_integer_part (dump_file
, "x", n
);
1921 fprintf (dump_file
, " * ");
1922 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1923 fprintf (dump_file
, ")");
1927 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1928 fprintf (dump_file
, " / (");
1929 dump_integer_part (dump_file
, "x", n
);
1930 fprintf (dump_file
, ")");
1935 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1937 fprintf (dump_file
, " * ");
1938 dump_integer_part (dump_file
, "x", n
);
1941 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1945 tree fract_res
= NULL_TREE
;
1948 /* Calculate the fractional part of the exponent. */
1949 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1951 if (synth_info
.factors
[i
])
1953 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1956 fract_res
= sqrt_chain
;
1959 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1960 fract_res
, sqrt_chain
);
1964 tree res
= NULL_TREE
;
1971 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1972 fract_res
, integer_res
);
1976 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1977 build_real (type
, dconst1
), res
);
1981 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1982 fract_res
, integer_res
);
1986 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1987 fract_res
, integer_res
);
1991 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1992 with location info LOC. If possible, create an equivalent and
1993 less expensive sequence of statements prior to GSI, and return an
1994 expession holding the result. */
1997 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
1998 tree arg0
, tree arg1
)
2000 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
2001 REAL_VALUE_TYPE c2
, dconst3
;
2003 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
2005 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
2006 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
2008 dconst1_4
= dconst1
;
2009 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
2011 /* If the exponent isn't a constant, there's nothing of interest
2013 if (TREE_CODE (arg1
) != REAL_CST
)
2016 /* Don't perform the operation if flag_signaling_nans is on
2017 and the operand is a signaling NaN. */
2018 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1
)))
2019 && ((TREE_CODE (arg0
) == REAL_CST
2020 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0
)))
2021 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1
))))
2024 /* If the exponent is equivalent to an integer, expand to an optimal
2025 multiplication sequence when profitable. */
2026 c
= TREE_REAL_CST (arg1
);
2027 n
= real_to_integer (&c
);
2028 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2029 c_is_int
= real_identical (&c
, &cint
);
2032 && ((n
>= -1 && n
<= 2)
2033 || (flag_unsafe_math_optimizations
2035 && powi_cost (n
) <= POWI_MAX_MULTS
)))
2036 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
2038 /* Attempt various optimizations using sqrt and cbrt. */
2039 type
= TREE_TYPE (arg0
);
2040 mode
= TYPE_MODE (type
);
2041 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2043 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2044 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2047 && real_equal (&c
, &dconsthalf
)
2048 && !HONOR_SIGNED_ZEROS (mode
))
2049 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2051 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
2053 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2054 optimizations since 1./3. is not exactly representable. If x
2055 is negative and finite, the correct value of pow(x,1./3.) is
2056 a NaN with the "invalid" exception raised, because the value
2057 of 1./3. actually has an even denominator. The correct value
2058 of cbrt(x) is a negative real value. */
2059 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
2060 dconst1_3
= real_value_truncate (mode
, dconst_third ());
2062 if (flag_unsafe_math_optimizations
2064 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2065 && real_equal (&c
, &dconst1_3
))
2066 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2068 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2069 if we don't have a hardware sqrt insn. */
2070 dconst1_6
= dconst1_3
;
2071 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
2073 if (flag_unsafe_math_optimizations
2076 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2079 && real_equal (&c
, &dconst1_6
))
2082 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2085 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
2089 /* Attempt to expand the POW as a product of square root chains.
2090 Expand the 0.25 case even when otpimising for size. */
2091 if (flag_unsafe_math_optimizations
2094 && (speed_p
|| real_equal (&c
, &dconst1_4
))
2095 && !HONOR_SIGNED_ZEROS (mode
))
2097 unsigned int max_depth
= speed_p
2098 ? param_max_pow_sqrt_depth
2101 tree expand_with_sqrts
2102 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
2104 if (expand_with_sqrts
)
2105 return expand_with_sqrts
;
2108 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
2109 n
= real_to_integer (&c2
);
2110 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2111 c2_is_int
= real_identical (&c2
, &cint
);
2113 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2115 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2116 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2118 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2119 different from pow(x, 1./3.) due to rounding and behavior with
2120 negative x, we need to constrain this transformation to unsafe
2121 math and positive x or finite math. */
2122 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
2123 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
2124 real_round (&c2
, mode
, &c2
);
2125 n
= real_to_integer (&c2
);
2126 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2127 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
2128 real_convert (&c2
, mode
, &c2
);
2130 if (flag_unsafe_math_optimizations
2132 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2133 && real_identical (&c2
, &c
)
2135 && optimize_function_for_speed_p (cfun
)
2136 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
2138 tree powi_x_ndiv3
= NULL_TREE
;
2140 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2141 possible or profitable, give up. Skip the degenerate case when
2142 abs(n) < 3, where the result is always 1. */
2143 if (absu_hwi (n
) >= 3)
2145 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
2151 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2152 as that creates an unnecessary variable. Instead, just produce
2153 either cbrt(x) or cbrt(x) * cbrt(x). */
2154 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2156 if (absu_hwi (n
) % 3 == 1)
2157 powi_cbrt_x
= cbrt_x
;
2159 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2162 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2163 if (absu_hwi (n
) < 3)
2164 result
= powi_cbrt_x
;
2166 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2167 powi_x_ndiv3
, powi_cbrt_x
);
2169 /* If n is negative, reciprocate the result. */
2171 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
2172 build_real (type
, dconst1
), result
);
2177 /* No optimizations succeeded. */
2181 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2182 on the SSA_NAME argument of each of them. */
2186 const pass_data pass_data_cse_sincos
=
2188 GIMPLE_PASS
, /* type */
2189 "sincos", /* name */
2190 OPTGROUP_NONE
, /* optinfo_flags */
2191 TV_TREE_SINCOS
, /* tv_id */
2192 PROP_ssa
, /* properties_required */
2193 0, /* properties_provided */
2194 0, /* properties_destroyed */
2195 0, /* todo_flags_start */
2196 TODO_update_ssa
, /* todo_flags_finish */
2199 class pass_cse_sincos
: public gimple_opt_pass
2202 pass_cse_sincos (gcc::context
*ctxt
)
2203 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
2206 /* opt_pass methods: */
2207 bool gate (function
*) final override
2212 unsigned int execute (function
*) final override
;
2214 }; // class pass_cse_sincos
2217 pass_cse_sincos::execute (function
*fun
)
2220 bool cfg_changed
= false;
2222 calculate_dominance_info (CDI_DOMINATORS
);
2223 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
2225 FOR_EACH_BB_FN (bb
, fun
)
2227 gimple_stmt_iterator gsi
;
2229 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2231 gimple
*stmt
= gsi_stmt (gsi
);
2233 if (is_gimple_call (stmt
)
2234 && gimple_call_lhs (stmt
))
2237 switch (gimple_call_combined_fn (stmt
))
2242 arg
= gimple_call_arg (stmt
, 0);
2243 /* Make sure we have either sincos or cexp. */
2244 if (!targetm
.libc_has_function (function_c99_math_complex
,
2246 && !targetm
.libc_has_function (function_sincos
,
2250 if (TREE_CODE (arg
) == SSA_NAME
)
2251 cfg_changed
|= execute_cse_sincos_1 (arg
);
2260 statistics_counter_event (fun
, "sincos statements inserted",
2261 sincos_stats
.inserted
);
2262 statistics_counter_event (fun
, "conv statements removed",
2263 sincos_stats
.conv_removed
);
2265 return cfg_changed
? TODO_cleanup_cfg
: 0;
2271 make_pass_cse_sincos (gcc::context
*ctxt
)
2273 return new pass_cse_sincos (ctxt
);
2276 /* Expand powi(x,n) into an optimal number of multiplies, when n is a
2280 const pass_data pass_data_expand_pow
=
2282 GIMPLE_PASS
, /* type */
2284 OPTGROUP_NONE
, /* optinfo_flags */
2285 TV_TREE_POW
, /* tv_id */
2286 PROP_ssa
, /* properties_required */
2287 PROP_gimple_opt_math
, /* properties_provided */
2288 0, /* properties_destroyed */
2289 0, /* todo_flags_start */
2290 TODO_update_ssa
, /* todo_flags_finish */
2293 class pass_expand_pow
: public gimple_opt_pass
2296 pass_expand_pow (gcc::context
*ctxt
)
2297 : gimple_opt_pass (pass_data_expand_pow
, ctxt
)
2300 /* opt_pass methods: */
2301 bool gate (function
*) final override
2306 unsigned int execute (function
*) final override
;
2308 }; // class pass_expand_pow
2311 pass_expand_pow::execute (function
*fun
)
2314 bool cfg_changed
= false;
2316 calculate_dominance_info (CDI_DOMINATORS
);
2318 FOR_EACH_BB_FN (bb
, fun
)
2320 gimple_stmt_iterator gsi
;
2321 bool cleanup_eh
= false;
2323 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2325 gimple
*stmt
= gsi_stmt (gsi
);
2327 /* Only the last stmt in a bb could throw, no need to call
2328 gimple_purge_dead_eh_edges if we change something in the middle
2329 of a basic block. */
2332 if (is_gimple_call (stmt
)
2333 && gimple_call_lhs (stmt
))
2335 tree arg0
, arg1
, result
;
2339 switch (gimple_call_combined_fn (stmt
))
2342 arg0
= gimple_call_arg (stmt
, 0);
2343 arg1
= gimple_call_arg (stmt
, 1);
2345 loc
= gimple_location (stmt
);
2346 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
2350 tree lhs
= gimple_get_lhs (stmt
);
2351 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2352 gimple_set_location (new_stmt
, loc
);
2353 unlink_stmt_vdef (stmt
);
2354 gsi_replace (&gsi
, new_stmt
, true);
2356 if (gimple_vdef (stmt
))
2357 release_ssa_name (gimple_vdef (stmt
));
2362 arg0
= gimple_call_arg (stmt
, 0);
2363 arg1
= gimple_call_arg (stmt
, 1);
2364 loc
= gimple_location (stmt
);
2366 if (real_minus_onep (arg0
))
2368 tree t0
, t1
, cond
, one
, minus_one
;
2371 t0
= TREE_TYPE (arg0
);
2372 t1
= TREE_TYPE (arg1
);
2373 one
= build_real (t0
, dconst1
);
2374 minus_one
= build_real (t0
, dconstm1
);
2376 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
2377 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
2378 arg1
, build_int_cst (t1
, 1));
2379 gimple_set_location (stmt
, loc
);
2380 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2382 result
= make_temp_ssa_name (t0
, NULL
, "powi");
2383 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
2385 gimple_set_location (stmt
, loc
);
2386 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2390 if (!tree_fits_shwi_p (arg1
))
2393 n
= tree_to_shwi (arg1
);
2394 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
2399 tree lhs
= gimple_get_lhs (stmt
);
2400 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2401 gimple_set_location (new_stmt
, loc
);
2402 unlink_stmt_vdef (stmt
);
2403 gsi_replace (&gsi
, new_stmt
, true);
2405 if (gimple_vdef (stmt
))
2406 release_ssa_name (gimple_vdef (stmt
));
2415 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
2418 return cfg_changed
? TODO_cleanup_cfg
: 0;
2424 make_pass_expand_pow (gcc::context
*ctxt
)
2426 return new pass_expand_pow (ctxt
);
2429 /* Return true if stmt is a type conversion operation that can be stripped
2430 when used in a widening multiply operation. */
2432 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2434 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2436 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2441 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2444 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2446 /* If the type of OP has the same precision as the result, then
2447 we can strip this conversion. The multiply operation will be
2448 selected to create the correct extension as a by-product. */
2449 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2452 /* We can also strip a conversion if it preserves the signed-ness of
2453 the operation and doesn't narrow the range. */
2454 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2456 /* If the inner-most type is unsigned, then we can strip any
2457 intermediate widening operation. If it's signed, then the
2458 intermediate widening operation must also be signed. */
2459 if ((TYPE_UNSIGNED (inner_op_type
)
2460 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2461 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2467 return rhs_code
== FIXED_CONVERT_EXPR
;
2470 /* Return true if RHS is a suitable operand for a widening multiplication,
2471 assuming a target type of TYPE.
2472 There are two cases:
2474 - RHS makes some value at least twice as wide. Store that value
2475 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2477 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2478 but leave *TYPE_OUT untouched. */
2481 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2487 if (TREE_CODE (rhs
) == SSA_NAME
)
2489 /* Use tree_non_zero_bits to see if this operand is zero_extended
2490 for unsigned widening multiplications or non-negative for
2491 signed widening multiplications. */
2492 if (TREE_CODE (type
) == INTEGER_TYPE
2493 && (TYPE_PRECISION (type
) & 1) == 0
2494 && int_mode_for_size (TYPE_PRECISION (type
) / 2, 1).exists ())
2496 unsigned int prec
= TYPE_PRECISION (type
);
2497 unsigned int hprec
= prec
/ 2;
2498 wide_int bits
= wide_int::from (tree_nonzero_bits (rhs
), prec
,
2499 TYPE_SIGN (TREE_TYPE (rhs
)));
2500 if (TYPE_UNSIGNED (type
)
2501 && wi::bit_and (bits
, wi::mask (hprec
, true, prec
)) == 0)
2503 *type_out
= build_nonstandard_integer_type (hprec
, true);
2504 /* X & MODE_MASK can be simplified to (T)X. */
2505 stmt
= SSA_NAME_DEF_STMT (rhs
);
2506 if (is_gimple_assign (stmt
)
2507 && gimple_assign_rhs_code (stmt
) == BIT_AND_EXPR
2508 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == INTEGER_CST
2509 && wide_int::from (wi::to_wide (gimple_assign_rhs2 (stmt
)),
2510 prec
, TYPE_SIGN (TREE_TYPE (rhs
)))
2511 == wi::mask (hprec
, false, prec
))
2512 *new_rhs_out
= gimple_assign_rhs1 (stmt
);
2517 else if (!TYPE_UNSIGNED (type
)
2518 && wi::bit_and (bits
, wi::mask (hprec
- 1, true, prec
)) == 0)
2520 *type_out
= build_nonstandard_integer_type (hprec
, false);
2526 stmt
= SSA_NAME_DEF_STMT (rhs
);
2527 if (is_gimple_assign (stmt
))
2530 if (widening_mult_conversion_strippable_p (type
, stmt
))
2532 rhs1
= gimple_assign_rhs1 (stmt
);
2534 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2536 *new_rhs_out
= rhs1
;
2547 type1
= TREE_TYPE (rhs1
);
2549 if (TREE_CODE (type1
) != TREE_CODE (type
)
2550 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2553 *new_rhs_out
= rhs1
;
2558 if (TREE_CODE (rhs
) == INTEGER_CST
)
2568 /* Return true if STMT performs a widening multiplication, assuming the
2569 output type is TYPE. If so, store the unwidened types of the operands
2570 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2571 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2572 and *TYPE2_OUT would give the operands of the multiplication. */
2575 is_widening_mult_p (gimple
*stmt
,
2576 tree
*type1_out
, tree
*rhs1_out
,
2577 tree
*type2_out
, tree
*rhs2_out
)
2579 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2581 if (TREE_CODE (type
) == INTEGER_TYPE
)
2583 if (TYPE_OVERFLOW_TRAPS (type
))
2586 else if (TREE_CODE (type
) != FIXED_POINT_TYPE
)
2589 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2593 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2597 if (*type1_out
== NULL
)
2599 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2601 *type1_out
= *type2_out
;
2604 if (*type2_out
== NULL
)
2606 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2608 *type2_out
= *type1_out
;
2611 /* Ensure that the larger of the two operands comes first. */
2612 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2614 std::swap (*type1_out
, *type2_out
);
2615 std::swap (*rhs1_out
, *rhs2_out
);
2621 /* Check to see if the CALL statement is an invocation of copysign
2622 with 1. being the first argument. */
2624 is_copysign_call_with_1 (gimple
*call
)
2626 gcall
*c
= dyn_cast
<gcall
*> (call
);
2630 enum combined_fn code
= gimple_call_combined_fn (c
);
2632 if (code
== CFN_LAST
)
2635 if (builtin_fn_p (code
))
2637 switch (as_builtin_fn (code
))
2639 CASE_FLT_FN (BUILT_IN_COPYSIGN
):
2640 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN
):
2641 return real_onep (gimple_call_arg (c
, 0));
2647 if (internal_fn_p (code
))
2649 switch (as_internal_fn (code
))
2652 return real_onep (gimple_call_arg (c
, 0));
2661 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2662 This only happens when the xorsign optab is defined, if the
2663 pattern is not a xorsign pattern or if expansion fails FALSE is
2664 returned, otherwise TRUE is returned. */
2666 convert_expand_mult_copysign (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2668 tree treeop0
, treeop1
, lhs
, type
;
2669 location_t loc
= gimple_location (stmt
);
2670 lhs
= gimple_assign_lhs (stmt
);
2671 treeop0
= gimple_assign_rhs1 (stmt
);
2672 treeop1
= gimple_assign_rhs2 (stmt
);
2673 type
= TREE_TYPE (lhs
);
2674 machine_mode mode
= TYPE_MODE (type
);
2676 if (HONOR_SNANS (type
))
2679 if (TREE_CODE (treeop0
) == SSA_NAME
&& TREE_CODE (treeop1
) == SSA_NAME
)
2681 gimple
*call0
= SSA_NAME_DEF_STMT (treeop0
);
2682 if (!has_single_use (treeop0
) || !is_copysign_call_with_1 (call0
))
2684 call0
= SSA_NAME_DEF_STMT (treeop1
);
2685 if (!has_single_use (treeop1
) || !is_copysign_call_with_1 (call0
))
2690 if (optab_handler (xorsign_optab
, mode
) == CODE_FOR_nothing
)
2693 gcall
*c
= as_a
<gcall
*> (call0
);
2694 treeop0
= gimple_call_arg (c
, 1);
2697 = gimple_build_call_internal (IFN_XORSIGN
, 2, treeop1
, treeop0
);
2698 gimple_set_lhs (call_stmt
, lhs
);
2699 gimple_set_location (call_stmt
, loc
);
2700 gsi_replace (gsi
, call_stmt
, true);
2707 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2708 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2709 value is true iff we converted the statement. */
2712 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2714 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2715 enum insn_code handler
;
2716 scalar_int_mode to_mode
, from_mode
, actual_mode
;
2718 int actual_precision
;
2719 location_t loc
= gimple_location (stmt
);
2720 bool from_unsigned1
, from_unsigned2
;
2722 lhs
= gimple_assign_lhs (stmt
);
2723 type
= TREE_TYPE (lhs
);
2724 if (TREE_CODE (type
) != INTEGER_TYPE
)
2727 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
2730 /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2731 avoid the tranform. */
2732 if ((TREE_CODE (rhs1
) == SSA_NAME
2733 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
2734 || (TREE_CODE (rhs2
) == SSA_NAME
2735 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
)))
2738 to_mode
= SCALAR_INT_TYPE_MODE (type
);
2739 from_mode
= SCALAR_INT_TYPE_MODE (type1
);
2740 if (to_mode
== from_mode
)
2743 from_unsigned1
= TYPE_UNSIGNED (type1
);
2744 from_unsigned2
= TYPE_UNSIGNED (type2
);
2746 if (from_unsigned1
&& from_unsigned2
)
2747 op
= umul_widen_optab
;
2748 else if (!from_unsigned1
&& !from_unsigned2
)
2749 op
= smul_widen_optab
;
2751 op
= usmul_widen_optab
;
2753 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
2756 if (handler
== CODE_FOR_nothing
)
2758 if (op
!= smul_widen_optab
)
2760 /* We can use a signed multiply with unsigned types as long as
2761 there is a wider mode to use, or it is the smaller of the two
2762 types that is unsigned. Note that type1 >= type2, always. */
2763 if ((TYPE_UNSIGNED (type1
)
2764 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2765 || (TYPE_UNSIGNED (type2
)
2766 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2768 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2769 || GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
2773 op
= smul_widen_optab
;
2774 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2778 if (handler
== CODE_FOR_nothing
)
2781 from_unsigned1
= from_unsigned2
= false;
2785 /* Expand can synthesize smul_widen_optab if the target
2786 supports umul_widen_optab. */
2787 op
= umul_widen_optab
;
2788 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2791 if (handler
== CODE_FOR_nothing
)
2796 /* Ensure that the inputs to the handler are in the correct precison
2797 for the opcode. This will be the full mode size. */
2798 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2799 if (2 * actual_precision
> TYPE_PRECISION (type
))
2801 if (actual_precision
!= TYPE_PRECISION (type1
)
2802 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2803 type1
= build_nonstandard_integer_type (actual_precision
, from_unsigned1
);
2804 if (!useless_type_conversion_p (type1
, TREE_TYPE (rhs1
)))
2806 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2807 rhs1
= fold_convert (type1
, rhs1
);
2809 rhs1
= build_and_insert_cast (gsi
, loc
, type1
, rhs1
);
2811 if (actual_precision
!= TYPE_PRECISION (type2
)
2812 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2813 type2
= build_nonstandard_integer_type (actual_precision
, from_unsigned2
);
2814 if (!useless_type_conversion_p (type2
, TREE_TYPE (rhs2
)))
2816 if (TREE_CODE (rhs2
) == INTEGER_CST
)
2817 rhs2
= fold_convert (type2
, rhs2
);
2819 rhs2
= build_and_insert_cast (gsi
, loc
, type2
, rhs2
);
2822 gimple_assign_set_rhs1 (stmt
, rhs1
);
2823 gimple_assign_set_rhs2 (stmt
, rhs2
);
2824 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
2826 widen_mul_stats
.widen_mults_inserted
++;
2830 /* Process a single gimple statement STMT, which is found at the
2831 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2832 rhs (given by CODE), and try to convert it into a
2833 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2834 is true iff we converted the statement. */
2837 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
2838 enum tree_code code
)
2840 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
2841 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
2842 tree type
, type1
, type2
, optype
;
2843 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
2844 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
2846 enum tree_code wmult_code
;
2847 enum insn_code handler
;
2848 scalar_mode to_mode
, from_mode
, actual_mode
;
2849 location_t loc
= gimple_location (stmt
);
2850 int actual_precision
;
2851 bool from_unsigned1
, from_unsigned2
;
2853 lhs
= gimple_assign_lhs (stmt
);
2854 type
= TREE_TYPE (lhs
);
2855 if ((TREE_CODE (type
) != INTEGER_TYPE
2856 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2857 || !type_has_mode_precision_p (type
))
2860 if (code
== MINUS_EXPR
)
2861 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
2863 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
2865 rhs1
= gimple_assign_rhs1 (stmt
);
2866 rhs2
= gimple_assign_rhs2 (stmt
);
2868 if (TREE_CODE (rhs1
) == SSA_NAME
)
2870 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2871 if (is_gimple_assign (rhs1_stmt
))
2872 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2875 if (TREE_CODE (rhs2
) == SSA_NAME
)
2877 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2878 if (is_gimple_assign (rhs2_stmt
))
2879 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2882 /* Allow for one conversion statement between the multiply
2883 and addition/subtraction statement. If there are more than
2884 one conversions then we assume they would invalidate this
2885 transformation. If that's not the case then they should have
2886 been folded before now. */
2887 if (CONVERT_EXPR_CODE_P (rhs1_code
))
2889 conv1_stmt
= rhs1_stmt
;
2890 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
2891 if (TREE_CODE (rhs1
) == SSA_NAME
)
2893 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2894 if (is_gimple_assign (rhs1_stmt
))
2895 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2900 if (CONVERT_EXPR_CODE_P (rhs2_code
))
2902 conv2_stmt
= rhs2_stmt
;
2903 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
2904 if (TREE_CODE (rhs2
) == SSA_NAME
)
2906 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2907 if (is_gimple_assign (rhs2_stmt
))
2908 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2914 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2915 is_widening_mult_p, but we still need the rhs returns.
2917 It might also appear that it would be sufficient to use the existing
2918 operands of the widening multiply, but that would limit the choice of
2919 multiply-and-accumulate instructions.
2921 If the widened-multiplication result has more than one uses, it is
2922 probably wiser not to do the conversion. Also restrict this operation
2923 to single basic block to avoid moving the multiply to a different block
2924 with a higher execution frequency. */
2925 if (code
== PLUS_EXPR
2926 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
2928 if (!has_single_use (rhs1
)
2929 || gimple_bb (rhs1_stmt
) != gimple_bb (stmt
)
2930 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
2931 &type2
, &mult_rhs2
))
2934 conv_stmt
= conv1_stmt
;
2936 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
2938 if (!has_single_use (rhs2
)
2939 || gimple_bb (rhs2_stmt
) != gimple_bb (stmt
)
2940 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
2941 &type2
, &mult_rhs2
))
2944 conv_stmt
= conv2_stmt
;
2949 to_mode
= SCALAR_TYPE_MODE (type
);
2950 from_mode
= SCALAR_TYPE_MODE (type1
);
2951 if (to_mode
== from_mode
)
2954 from_unsigned1
= TYPE_UNSIGNED (type1
);
2955 from_unsigned2
= TYPE_UNSIGNED (type2
);
2958 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2959 if (from_unsigned1
!= from_unsigned2
)
2961 if (!INTEGRAL_TYPE_P (type
))
2963 /* We can use a signed multiply with unsigned types as long as
2964 there is a wider mode to use, or it is the smaller of the two
2965 types that is unsigned. Note that type1 >= type2, always. */
2967 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2969 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2971 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2972 || GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
2976 from_unsigned1
= from_unsigned2
= false;
2977 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
2981 /* If there was a conversion between the multiply and addition
2982 then we need to make sure it fits a multiply-and-accumulate.
2983 The should be a single mode change which does not change the
2987 /* We use the original, unmodified data types for this. */
2988 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
2989 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
2990 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
2991 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
2993 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
2995 /* Conversion is a truncate. */
2996 if (TYPE_PRECISION (to_type
) < data_size
)
2999 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
3001 /* Conversion is an extend. Check it's the right sort. */
3002 if (TYPE_UNSIGNED (from_type
) != is_unsigned
3003 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
3006 /* else convert is a no-op for our purposes. */
3009 /* Verify that the machine can perform a widening multiply
3010 accumulate in this mode/signedness combination, otherwise
3011 this transformation is likely to pessimize code. */
3012 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
3013 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
3014 from_mode
, &actual_mode
);
3016 if (handler
== CODE_FOR_nothing
)
3019 /* Ensure that the inputs to the handler are in the correct precison
3020 for the opcode. This will be the full mode size. */
3021 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3022 if (actual_precision
!= TYPE_PRECISION (type1
)
3023 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3024 type1
= build_nonstandard_integer_type (actual_precision
, from_unsigned1
);
3025 if (!useless_type_conversion_p (type1
, TREE_TYPE (mult_rhs1
)))
3027 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
3028 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
3030 mult_rhs1
= build_and_insert_cast (gsi
, loc
, type1
, mult_rhs1
);
3032 if (actual_precision
!= TYPE_PRECISION (type2
)
3033 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3034 type2
= build_nonstandard_integer_type (actual_precision
, from_unsigned2
);
3035 if (!useless_type_conversion_p (type2
, TREE_TYPE (mult_rhs2
)))
3037 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
3038 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
3040 mult_rhs2
= build_and_insert_cast (gsi
, loc
, type2
, mult_rhs2
);
3043 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
3044 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
3046 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
3048 update_stmt (gsi_stmt (*gsi
));
3049 widen_mul_stats
.maccs_inserted
++;
3053 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3054 OP2 and which we know is used in statements that can be, together with the
3055 multiplication, converted to FMAs, perform the transformation. */
3058 convert_mult_to_fma_1 (tree mul_result
, tree op1
, tree op2
)
3060 tree type
= TREE_TYPE (mul_result
);
3062 imm_use_iterator imm_iter
;
3065 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
3067 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3068 tree addop
, mulop1
= op1
, result
= mul_result
;
3069 bool negate_p
= false;
3070 gimple_seq seq
= NULL
;
3072 if (is_gimple_debug (use_stmt
))
3075 if (is_gimple_assign (use_stmt
)
3076 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3078 result
= gimple_assign_lhs (use_stmt
);
3079 use_operand_p use_p
;
3080 gimple
*neguse_stmt
;
3081 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3082 gsi_remove (&gsi
, true);
3083 release_defs (use_stmt
);
3085 use_stmt
= neguse_stmt
;
3086 gsi
= gsi_for_stmt (use_stmt
);
3090 tree cond
, else_value
, ops
[3], len
, bias
;
3092 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
,
3096 addop
= ops
[0] == result
? ops
[1] : ops
[0];
3098 if (code
== MINUS_EXPR
)
3100 if (ops
[0] == result
)
3101 /* a * b - c -> a * b + (-c) */
3102 addop
= gimple_build (&seq
, NEGATE_EXPR
, type
, addop
);
3104 /* a - b * c -> (-b) * c + a */
3105 negate_p
= !negate_p
;
3109 mulop1
= gimple_build (&seq
, NEGATE_EXPR
, type
, mulop1
);
3112 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3116 = gimple_build_call_internal (IFN_COND_LEN_FMA
, 7, cond
, mulop1
, op2
,
3117 addop
, else_value
, len
, bias
);
3119 fma_stmt
= gimple_build_call_internal (IFN_COND_FMA
, 5, cond
, mulop1
,
3120 op2
, addop
, else_value
);
3122 fma_stmt
= gimple_build_call_internal (IFN_FMA
, 3, mulop1
, op2
, addop
);
3123 gimple_set_lhs (fma_stmt
, gimple_get_lhs (use_stmt
));
3124 gimple_call_set_nothrow (fma_stmt
, !stmt_can_throw_internal (cfun
,
3126 gsi_replace (&gsi
, fma_stmt
, true);
3127 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3128 regardless of where the negation occurs. */
3129 gimple
*orig_stmt
= gsi_stmt (gsi
);
3130 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3132 if (maybe_clean_or_replace_eh_stmt (orig_stmt
, gsi_stmt (gsi
)))
3134 update_stmt (gsi_stmt (gsi
));
3137 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3139 fprintf (dump_file
, "Generated FMA ");
3140 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3141 fprintf (dump_file
, "\n");
3144 /* If the FMA result is negated in a single use, fold the negation
3146 orig_stmt
= gsi_stmt (gsi
);
3147 use_operand_p use_p
;
3149 if (is_gimple_call (orig_stmt
)
3150 && gimple_call_internal_p (orig_stmt
)
3151 && gimple_call_lhs (orig_stmt
)
3152 && TREE_CODE (gimple_call_lhs (orig_stmt
)) == SSA_NAME
3153 && single_imm_use (gimple_call_lhs (orig_stmt
), &use_p
, &neg_stmt
)
3154 && is_gimple_assign (neg_stmt
)
3155 && gimple_assign_rhs_code (neg_stmt
) == NEGATE_EXPR
3156 && !stmt_could_throw_p (cfun
, neg_stmt
))
3158 gsi
= gsi_for_stmt (neg_stmt
);
3159 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3161 if (maybe_clean_or_replace_eh_stmt (neg_stmt
, gsi_stmt (gsi
)))
3163 update_stmt (gsi_stmt (gsi
));
3164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3166 fprintf (dump_file
, "Folded FMA negation ");
3167 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3168 fprintf (dump_file
, "\n");
3173 widen_mul_stats
.fmas_inserted
++;
3177 /* Data necessary to perform the actual transformation from a multiplication
3178 and an addition to an FMA after decision is taken it should be done and to
3179 then delete the multiplication statement from the function IL. */
3181 struct fma_transformation_info
3189 /* Structure containing the current state of FMA deferring, i.e. whether we are
3190 deferring, whether to continue deferring, and all data necessary to come
3191 back and perform all deferred transformations. */
3193 class fma_deferring_state
3196 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3197 do any deferring. */
3199 fma_deferring_state (bool perform_deferring
)
3200 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL
),
3201 m_last_result (NULL_TREE
), m_deferring_p (perform_deferring
) {}
3203 /* List of FMA candidates for which we the transformation has been determined
3204 possible but we at this point in BB analysis we do not consider them
3206 auto_vec
<fma_transformation_info
, 8> m_candidates
;
3208 /* Set of results of multiplication that are part of an already deferred FMA
3210 hash_set
<tree
> m_mul_result_set
;
3212 /* The PHI that supposedly feeds back result of a FMA to another over loop
3214 gphi
*m_initial_phi
;
3216 /* Result of the last produced FMA candidate or NULL if there has not been
3220 /* If true, deferring might still be profitable. If false, transform all
3221 candidates and no longer defer. */
3225 /* Transform all deferred FMA candidates and mark STATE as no longer
3229 cancel_fma_deferring (fma_deferring_state
*state
)
3231 if (!state
->m_deferring_p
)
3234 for (unsigned i
= 0; i
< state
->m_candidates
.length (); i
++)
3236 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3237 fprintf (dump_file
, "Generating deferred FMA\n");
3239 const fma_transformation_info
&fti
= state
->m_candidates
[i
];
3240 convert_mult_to_fma_1 (fti
.mul_result
, fti
.op1
, fti
.op2
);
3242 gimple_stmt_iterator gsi
= gsi_for_stmt (fti
.mul_stmt
);
3243 gsi_remove (&gsi
, true);
3244 release_defs (fti
.mul_stmt
);
3246 state
->m_deferring_p
= false;
3249 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3250 Otherwise return NULL. */
3253 result_of_phi (tree op
)
3255 if (TREE_CODE (op
) != SSA_NAME
)
3258 return dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (op
));
3261 /* After processing statements of a BB and recording STATE, return true if the
3262 initial phi is fed by the last FMA candidate result ore one such result from
3263 previously processed BBs marked in LAST_RESULT_SET. */
3266 last_fma_candidate_feeds_initial_phi (fma_deferring_state
*state
,
3267 hash_set
<tree
> *last_result_set
)
3271 FOR_EACH_PHI_ARG (use
, state
->m_initial_phi
, iter
, SSA_OP_USE
)
3273 tree t
= USE_FROM_PTR (use
);
3274 if (t
== state
->m_last_result
3275 || last_result_set
->contains (t
))
3282 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3283 with uses in additions and subtractions to form fused multiply-add
3284 operations. Returns true if successful and MUL_STMT should be removed.
3285 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3286 on MUL_COND, otherwise it is unconditional.
3288 If STATE indicates that we are deferring FMA transformation, that means
3289 that we do not produce FMAs for basic blocks which look like:
3292 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3294 accumulator_66 = _65 + accumulator_111;
3296 or its unrolled version, i.e. with several FMA candidates that feed result
3297 of one into the addend of another. Instead, we add them to a list in STATE
3298 and if we later discover an FMA candidate that is not part of such a chain,
3299 we go back and perform all deferred past candidates. */
3302 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
,
3303 fma_deferring_state
*state
, tree mul_cond
= NULL_TREE
,
3304 tree mul_len
= NULL_TREE
, tree mul_bias
= NULL_TREE
)
3306 tree mul_result
= gimple_get_lhs (mul_stmt
);
3307 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3308 if the statement was left just for the side-effects. */
3311 tree type
= TREE_TYPE (mul_result
);
3312 gimple
*use_stmt
, *neguse_stmt
;
3313 use_operand_p use_p
;
3314 imm_use_iterator imm_iter
;
3316 if (FLOAT_TYPE_P (type
)
3317 && flag_fp_contract_mode
!= FP_CONTRACT_FAST
)
3320 /* We don't want to do bitfield reduction ops. */
3321 if (INTEGRAL_TYPE_P (type
)
3322 && (!type_has_mode_precision_p (type
) || TYPE_OVERFLOW_TRAPS (type
)))
3325 /* If the target doesn't support it, don't generate it. We assume that
3326 if fma isn't available then fms, fnma or fnms are not either. */
3327 optimization_type opt_type
= bb_optimization_type (gimple_bb (mul_stmt
));
3328 if (!direct_internal_fn_supported_p (IFN_FMA
, type
, opt_type
))
3331 /* If the multiplication has zero uses, it is kept around probably because
3332 of -fnon-call-exceptions. Don't optimize it away in that case,
3334 if (has_zero_uses (mul_result
))
3338 = (state
->m_deferring_p
3339 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type
)),
3340 param_avoid_fma_max_bits
));
3341 bool defer
= check_defer
;
3342 bool seen_negate_p
= false;
3344 /* There is no numerical difference between fused and unfused integer FMAs,
3345 and the assumption below that FMA is as cheap as addition is unlikely
3346 to be true, especially if the multiplication occurs multiple times on
3347 the same chain. E.g., for something like:
3349 (((a * b) + c) >> 1) + (a * b)
3351 we do not want to duplicate the a * b into two additions, not least
3352 because the result is not a natural FMA chain. */
3353 if (ANY_INTEGRAL_TYPE_P (type
)
3354 && !has_single_use (mul_result
))
3357 if (!dbg_cnt (form_fma
))
3360 /* Make sure that the multiplication statement becomes dead after
3361 the transformation, thus that all uses are transformed to FMAs.
3362 This means we assume that an FMA operation has the same cost
3364 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3366 tree result
= mul_result
;
3367 bool negate_p
= false;
3369 use_stmt
= USE_STMT (use_p
);
3371 if (is_gimple_debug (use_stmt
))
3374 /* For now restrict this operations to single basic blocks. In theory
3375 we would want to support sinking the multiplication in
3381 to form a fma in the then block and sink the multiplication to the
3383 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3386 /* A negate on the multiplication leads to FNMA. */
3387 if (is_gimple_assign (use_stmt
)
3388 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3393 /* If (due to earlier missed optimizations) we have two
3394 negates of the same value, treat them as equivalent
3395 to a single negate with multiple uses. */
3399 result
= gimple_assign_lhs (use_stmt
);
3401 /* Make sure the negate statement becomes dead with this
3402 single transformation. */
3403 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3404 &use_p
, &neguse_stmt
))
3407 /* Make sure the multiplication isn't also used on that stmt. */
3408 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3409 if (USE_FROM_PTR (usep
) == mul_result
)
3413 use_stmt
= neguse_stmt
;
3414 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3417 negate_p
= seen_negate_p
= true;
3420 tree cond
, else_value
, ops
[3], len
, bias
;
3422 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
, ops
,
3423 &else_value
, &len
, &bias
))
3429 if (ops
[1] == result
)
3430 negate_p
= !negate_p
;
3435 /* FMA can only be formed from PLUS and MINUS. */
3441 /* For COND_LEN_* operations, we may have dummpy mask which is
3442 the all true mask. Such TREE type may be mul_cond != cond
3443 but we still consider they are equal. */
3444 if (mul_cond
&& cond
!= mul_cond
3445 && !(integer_truep (mul_cond
) && integer_truep (cond
)))
3448 if (else_value
== result
)
3451 if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA
, type
,
3457 poly_int64 mul_value
, value
;
3458 if (poly_int_tree_p (mul_len
, &mul_value
)
3459 && poly_int_tree_p (len
, &value
)
3460 && maybe_ne (mul_value
, value
))
3462 else if (mul_len
!= len
)
3465 if (wi::to_widest (mul_bias
) != wi::to_widest (bias
))
3471 if (mul_cond
&& cond
!= mul_cond
)
3476 if (cond
== result
|| else_value
== result
)
3478 if (!direct_internal_fn_supported_p (IFN_COND_FMA
, type
,
3484 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3485 we'll visit later, we might be able to get a more profitable
3487 OTOH, if we don't, a negate / fma pair has likely lower latency
3488 that a mult / subtract pair. */
3489 if (code
== MINUS_EXPR
3492 && !direct_internal_fn_supported_p (IFN_FMS
, type
, opt_type
)
3493 && direct_internal_fn_supported_p (IFN_FNMA
, type
, opt_type
)
3494 && TREE_CODE (ops
[1]) == SSA_NAME
3495 && has_single_use (ops
[1]))
3497 gimple
*stmt2
= SSA_NAME_DEF_STMT (ops
[1]);
3498 if (is_gimple_assign (stmt2
)
3499 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3503 /* We can't handle a * b + a * b. */
3504 if (ops
[0] == ops
[1])
3506 /* If deferring, make sure we are not looking at an instruction that
3507 wouldn't have existed if we were not. */
3508 if (state
->m_deferring_p
3509 && (state
->m_mul_result_set
.contains (ops
[0])
3510 || state
->m_mul_result_set
.contains (ops
[1])))
3515 tree use_lhs
= gimple_get_lhs (use_stmt
);
3516 if (state
->m_last_result
)
3518 if (ops
[1] == state
->m_last_result
3519 || ops
[0] == state
->m_last_result
)
3526 gcc_checking_assert (!state
->m_initial_phi
);
3528 if (ops
[0] == result
)
3529 phi
= result_of_phi (ops
[1]);
3532 gcc_assert (ops
[1] == result
);
3533 phi
= result_of_phi (ops
[0]);
3538 state
->m_initial_phi
= phi
;
3545 state
->m_last_result
= use_lhs
;
3546 check_defer
= false;
3551 /* While it is possible to validate whether or not the exact form that
3552 we've recognized is available in the backend, the assumption is that
3553 if the deferring logic above did not trigger, the transformation is
3554 never a loss. For instance, suppose the target only has the plain FMA
3555 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3556 MUL+SUB for FMA+NEG, which is still two operations. Consider
3557 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3558 form the two NEGs are independent and could be run in parallel. */
3563 fma_transformation_info fti
;
3564 fti
.mul_stmt
= mul_stmt
;
3565 fti
.mul_result
= mul_result
;
3568 state
->m_candidates
.safe_push (fti
);
3569 state
->m_mul_result_set
.add (mul_result
);
3571 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3573 fprintf (dump_file
, "Deferred generating FMA for multiplication ");
3574 print_gimple_stmt (dump_file
, mul_stmt
, 0, TDF_NONE
);
3575 fprintf (dump_file
, "\n");
3582 if (state
->m_deferring_p
)
3583 cancel_fma_deferring (state
);
3584 convert_mult_to_fma_1 (mul_result
, op1
, op2
);
3590 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3591 a check for non-zero like:
3592 _1 = x_4(D) * y_5(D);
3595 goto <bb 3>; [50.00%]
3597 goto <bb 4>; [50.00%]
3599 <bb 3> [local count: 536870913]:
3604 <bb 4> [local count: 1073741824]:
3605 # iftmp.0_3 = PHI <_10(3), 0(2)>
3606 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3607 optimize the x_4(D) != 0 condition to 1. */
3610 maybe_optimize_guarding_check (vec
<gimple
*> &mul_stmts
, gimple
*cond_stmt
,
3611 gimple
*div_stmt
, bool *cfg_changed
)
3613 basic_block bb
= gimple_bb (cond_stmt
);
3614 if (gimple_bb (div_stmt
) != bb
|| !single_pred_p (bb
))
3616 edge pred_edge
= single_pred_edge (bb
);
3617 basic_block pred_bb
= pred_edge
->src
;
3618 if (EDGE_COUNT (pred_bb
->succs
) != 2)
3620 edge other_edge
= EDGE_SUCC (pred_bb
, EDGE_SUCC (pred_bb
, 0) == pred_edge
);
3621 edge other_succ_edge
= NULL
;
3622 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3624 if (EDGE_COUNT (bb
->succs
) != 2)
3626 other_succ_edge
= EDGE_SUCC (bb
, 0);
3627 if (gimple_cond_code (cond_stmt
) == NE_EXPR
)
3629 if (other_succ_edge
->flags
& EDGE_TRUE_VALUE
)
3630 other_succ_edge
= EDGE_SUCC (bb
, 1);
3632 else if (other_succ_edge
->flags
& EDGE_FALSE_VALUE
)
3633 other_succ_edge
= EDGE_SUCC (bb
, 0);
3634 if (other_edge
->dest
!= other_succ_edge
->dest
)
3637 else if (!single_succ_p (bb
) || other_edge
->dest
!= single_succ (bb
))
3639 gcond
*zero_cond
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (pred_bb
));
3640 if (zero_cond
== NULL
3641 || (gimple_cond_code (zero_cond
)
3642 != ((pred_edge
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
))
3643 || !integer_zerop (gimple_cond_rhs (zero_cond
)))
3645 tree zero_cond_lhs
= gimple_cond_lhs (zero_cond
);
3646 if (TREE_CODE (zero_cond_lhs
) != SSA_NAME
)
3648 if (gimple_assign_rhs2 (div_stmt
) != zero_cond_lhs
)
3650 /* Allow the divisor to be result of a same precision cast
3651 from zero_cond_lhs. */
3652 tree rhs2
= gimple_assign_rhs2 (div_stmt
);
3653 if (TREE_CODE (rhs2
) != SSA_NAME
)
3655 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3656 if (!gimple_assign_cast_p (g
)
3657 || gimple_assign_rhs1 (g
) != gimple_cond_lhs (zero_cond
)
3658 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs
))
3659 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs
))
3660 != TYPE_PRECISION (TREE_TYPE (rhs2
))))
3663 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
3664 mul_stmts
.quick_push (div_stmt
);
3665 if (is_gimple_debug (gsi_stmt (gsi
)))
3666 gsi_next_nondebug (&gsi
);
3667 unsigned cast_count
= 0;
3668 while (gsi_stmt (gsi
) != cond_stmt
)
3670 /* If original mul_stmt has a single use, allow it in the same bb,
3671 we are looking then just at __builtin_mul_overflow_p.
3672 Though, in that case the original mul_stmt will be replaced
3673 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3677 FOR_EACH_VEC_ELT (mul_stmts
, i
, mul_stmt
)
3679 if (gsi_stmt (gsi
) == mul_stmt
)
3685 if (!ok
&& gimple_assign_cast_p (gsi_stmt (gsi
)) && ++cast_count
< 4)
3689 gsi_next_nondebug (&gsi
);
3691 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3693 basic_block succ_bb
= other_edge
->dest
;
3694 for (gphi_iterator gpi
= gsi_start_phis (succ_bb
); !gsi_end_p (gpi
);
3697 gphi
*phi
= gpi
.phi ();
3698 tree v1
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3699 tree v2
= gimple_phi_arg_def (phi
, other_succ_edge
->dest_idx
);
3700 if (!operand_equal_p (v1
, v2
, 0))
3706 tree lhs
= gimple_assign_lhs (cond_stmt
);
3707 if (!lhs
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
3709 gsi_next_nondebug (&gsi
);
3710 if (!gsi_end_p (gsi
))
3712 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3714 gimple
*cast_stmt
= gsi_stmt (gsi
);
3715 if (!gimple_assign_cast_p (cast_stmt
))
3717 tree new_lhs
= gimple_assign_lhs (cast_stmt
);
3718 gsi_next_nondebug (&gsi
);
3719 if (!gsi_end_p (gsi
)
3721 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs
))
3722 || TYPE_PRECISION (TREE_TYPE (new_lhs
)) <= 1)
3726 edge succ_edge
= single_succ_edge (bb
);
3727 basic_block succ_bb
= succ_edge
->dest
;
3728 gsi
= gsi_start_phis (succ_bb
);
3729 if (gsi_end_p (gsi
))
3731 gphi
*phi
= as_a
<gphi
*> (gsi_stmt (gsi
));
3733 if (!gsi_end_p (gsi
))
3735 if (gimple_phi_arg_def (phi
, succ_edge
->dest_idx
) != lhs
)
3737 tree other_val
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3738 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3740 tree cond
= gimple_assign_rhs1 (cond_stmt
);
3741 if (TREE_CODE (cond
) == NE_EXPR
)
3743 if (!operand_equal_p (other_val
,
3744 gimple_assign_rhs3 (cond_stmt
), 0))
3747 else if (!operand_equal_p (other_val
,
3748 gimple_assign_rhs2 (cond_stmt
), 0))
3751 else if (gimple_assign_rhs_code (cond_stmt
) == NE_EXPR
)
3753 if (!integer_zerop (other_val
))
3756 else if (!integer_onep (other_val
))
3759 if (pred_edge
->flags
& EDGE_TRUE_VALUE
)
3760 gimple_cond_make_true (zero_cond
);
3762 gimple_cond_make_false (zero_cond
);
3763 update_stmt (zero_cond
);
3764 *cfg_changed
= true;
3767 /* Helper function for arith_overflow_check_p. Return true
3768 if VAL1 is equal to VAL2 cast to corresponding integral type
3769 with other signedness or vice versa. */
3772 arith_cast_equal_p (tree val1
, tree val2
)
3774 if (TREE_CODE (val1
) == INTEGER_CST
&& TREE_CODE (val2
) == INTEGER_CST
)
3775 return wi::eq_p (wi::to_wide (val1
), wi::to_wide (val2
));
3776 else if (TREE_CODE (val1
) != SSA_NAME
|| TREE_CODE (val2
) != SSA_NAME
)
3778 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1
))
3779 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1
)) == val2
)
3781 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2
))
3782 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2
)) == val1
)
3787 /* Helper function of match_arith_overflow. Return 1
3788 if USE_STMT is unsigned overflow check ovf != 0 for
3789 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3793 arith_overflow_check_p (gimple
*stmt
, gimple
*cast_stmt
, gimple
*&use_stmt
,
3794 tree maxval
, tree
*other
)
3796 enum tree_code ccode
= ERROR_MARK
;
3797 tree crhs1
= NULL_TREE
, crhs2
= NULL_TREE
;
3798 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3799 tree lhs
= gimple_assign_lhs (cast_stmt
? cast_stmt
: stmt
);
3800 tree rhs1
= gimple_assign_rhs1 (stmt
);
3801 tree rhs2
= gimple_assign_rhs2 (stmt
);
3802 tree multop
= NULL_TREE
, divlhs
= NULL_TREE
;
3803 gimple
*cur_use_stmt
= use_stmt
;
3805 if (code
== MULT_EXPR
)
3807 if (!is_gimple_assign (use_stmt
))
3809 if (gimple_assign_rhs_code (use_stmt
) != TRUNC_DIV_EXPR
)
3811 if (gimple_assign_rhs1 (use_stmt
) != lhs
)
3815 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs1
))
3817 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
))
3822 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
3824 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
, 0))
3828 if (stmt_ends_bb_p (use_stmt
))
3830 divlhs
= gimple_assign_lhs (use_stmt
);
3834 if (!single_imm_use (divlhs
, &use
, &cur_use_stmt
))
3836 if (cast_stmt
&& gimple_assign_cast_p (cur_use_stmt
))
3838 tree cast_lhs
= gimple_assign_lhs (cur_use_stmt
);
3839 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
3840 && TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
3841 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
3842 == TYPE_PRECISION (TREE_TYPE (divlhs
)))
3843 && single_imm_use (cast_lhs
, &use
, &cur_use_stmt
))
3852 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3854 ccode
= gimple_cond_code (cur_use_stmt
);
3855 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3856 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3858 else if (is_gimple_assign (cur_use_stmt
))
3860 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3862 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3863 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3864 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3866 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3868 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3869 if (COMPARISON_CLASS_P (cond
))
3871 ccode
= TREE_CODE (cond
);
3872 crhs1
= TREE_OPERAND (cond
, 0);
3873 crhs2
= TREE_OPERAND (cond
, 1);
3885 && ccode
== RSHIFT_EXPR
3887 && TREE_CODE (crhs2
) == INTEGER_CST
3888 && wi::to_widest (crhs2
) == TYPE_PRECISION (TREE_TYPE (maxval
)))
3890 tree shiftlhs
= gimple_assign_lhs (use_stmt
);
3894 if (!single_imm_use (shiftlhs
, &use
, &cur_use_stmt
))
3896 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3898 ccode
= gimple_cond_code (cur_use_stmt
);
3899 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3900 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3902 else if (is_gimple_assign (cur_use_stmt
))
3904 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3906 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3907 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3908 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3910 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3912 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3913 if (COMPARISON_CLASS_P (cond
))
3915 ccode
= TREE_CODE (cond
);
3916 crhs1
= TREE_OPERAND (cond
, 0);
3917 crhs2
= TREE_OPERAND (cond
, 1);
3924 enum tree_code sc
= gimple_assign_rhs_code (cur_use_stmt
);
3925 tree castlhs
= gimple_assign_lhs (cur_use_stmt
);
3926 if (!CONVERT_EXPR_CODE_P (sc
)
3928 || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs
))
3929 || (TYPE_PRECISION (TREE_TYPE (castlhs
))
3930 > TYPE_PRECISION (TREE_TYPE (maxval
))))
3937 if ((ccode
!= EQ_EXPR
&& ccode
!= NE_EXPR
)
3938 || crhs1
!= shiftlhs
3939 || !integer_zerop (crhs2
))
3944 if (TREE_CODE_CLASS (ccode
) != tcc_comparison
)
3953 /* r = a + b; r > maxval or r <= maxval */
3955 && TREE_CODE (crhs2
) == INTEGER_CST
3956 && tree_int_cst_equal (crhs2
, maxval
))
3957 return ccode
== GT_EXPR
? 1 : -1;
3960 /* r = a - b; r > a or r <= a
3961 r = a + b; a > r or a <= r or b > r or b <= r. */
3962 if ((code
== MINUS_EXPR
&& crhs1
== lhs
&& crhs2
== rhs1
)
3963 || (code
== PLUS_EXPR
&& (crhs1
== rhs1
|| crhs1
== rhs2
)
3965 return ccode
== GT_EXPR
? 1 : -1;
3966 /* r = ~a; b > r or b <= r. */
3967 if (code
== BIT_NOT_EXPR
&& crhs2
== lhs
)
3971 return ccode
== GT_EXPR
? 1 : -1;
3978 /* r = a - b; a < r or a >= r
3979 r = a + b; r < a or r >= a or r < b or r >= b. */
3980 if ((code
== MINUS_EXPR
&& crhs1
== rhs1
&& crhs2
== lhs
)
3981 || (code
== PLUS_EXPR
&& crhs1
== lhs
3982 && (crhs2
== rhs1
|| crhs2
== rhs2
)))
3983 return ccode
== LT_EXPR
? 1 : -1;
3984 /* r = ~a; r < b or r >= b. */
3985 if (code
== BIT_NOT_EXPR
&& crhs1
== lhs
)
3989 return ccode
== LT_EXPR
? 1 : -1;
3994 /* r = a * b; _1 = r / a; _1 == b
3995 r = a * b; _1 = r / b; _1 == a
3996 r = a * b; _1 = r / a; _1 != b
3997 r = a * b; _1 = r / b; _1 != a. */
3998 if (code
== MULT_EXPR
)
4002 if ((crhs1
== divlhs
&& arith_cast_equal_p (crhs2
, multop
))
4003 || (crhs2
== divlhs
&& arith_cast_equal_p (crhs1
, multop
)))
4005 use_stmt
= cur_use_stmt
;
4006 return ccode
== NE_EXPR
? 1 : -1;
4009 else if ((crhs1
== divlhs
&& operand_equal_p (crhs2
, multop
, 0))
4010 || (crhs2
== divlhs
&& crhs1
== multop
))
4012 use_stmt
= cur_use_stmt
;
4013 return ccode
== NE_EXPR
? 1 : -1;
4023 extern bool gimple_unsigned_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4024 extern bool gimple_unsigned_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4025 extern bool gimple_unsigned_integer_sat_trunc (tree
, tree
*, tree (*)(tree
));
4027 extern bool gimple_signed_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4028 extern bool gimple_signed_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4029 extern bool gimple_signed_integer_sat_trunc (tree
, tree
*, tree (*)(tree
));
4032 build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator
*gsi
,
4033 internal_fn fn
, tree lhs
,
4034 tree op_0
, tree op_1
)
4036 if (direct_internal_fn_supported_p (fn
, TREE_TYPE (lhs
), OPTIMIZE_FOR_BOTH
))
4038 gcall
*call
= gimple_build_call_internal (fn
, 2, op_0
, op_1
);
4039 gimple_call_set_lhs (call
, lhs
);
4040 gsi_replace (gsi
, call
, /* update_eh_info */ true);
4045 build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator
*gsi
,
4046 internal_fn fn
, tree lhs
,
4047 tree op_0
, tree op_1
)
4049 if (!direct_internal_fn_supported_p (fn
, TREE_TYPE (op_0
), OPTIMIZE_FOR_BOTH
))
4052 gcall
*call
= gimple_build_call_internal (fn
, 2, op_0
, op_1
);
4053 gimple_call_set_lhs (call
, lhs
);
4054 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
4060 * Try to match saturation unsigned add with assign.
4063 * _9 = (long unsigned int) _8;
4067 * _12 = .SAT_ADD (_4, _6); */
4070 match_unsigned_saturation_add (gimple_stmt_iterator
*gsi
, gassign
*stmt
)
4073 tree lhs
= gimple_assign_lhs (stmt
);
4075 if (gimple_unsigned_integer_sat_add (lhs
, ops
, NULL
))
4076 build_saturation_binary_arith_call_and_replace (gsi
, IFN_SAT_ADD
, lhs
,
4081 * Try to match saturation add with PHI.
4082 * For unsigned integer:
4084 * _1 = x_3(D) + y_4(D);
4086 * goto <bb 3>; [INV]
4088 * goto <bb 4>; [INV]
4093 * # _2 = PHI <255(2), _1(3)>
4095 * <bb 4> [local count: 1073741824]:
4096 * _2 = .SAT_ADD (x_4(D), y_5(D));
4098 * For signed integer:
4099 * x.0_1 = (long unsigned int) x_7(D);
4100 * y.1_2 = (long unsigned int) y_8(D);
4101 * _3 = x.0_1 + y.1_2;
4102 * sum_9 = (int64_t) _3;
4103 * _4 = x_7(D) ^ y_8(D);
4104 * _5 = x_7(D) ^ sum_9;
4108 * goto <bb 3>; [41.00%]
4110 * goto <bb 4>; [59.00%]
4112 * _12 = (long int) _11;
4114 * _14 = _13 ^ 9223372036854775807;
4115 * # _6 = PHI <_14(3), sum_9(2)>
4117 * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
4120 match_saturation_add (gimple_stmt_iterator
*gsi
, gphi
*phi
)
4122 if (gimple_phi_num_args (phi
) != 2)
4126 tree phi_result
= gimple_phi_result (phi
);
4128 if (!gimple_unsigned_integer_sat_add (phi_result
, ops
, NULL
)
4129 && !gimple_signed_integer_sat_add (phi_result
, ops
, NULL
))
4132 return build_saturation_binary_arith_call_and_insert (gsi
, IFN_SAT_ADD
,
4138 * Try to match saturation unsigned sub.
4143 * _6 = .SAT_SUB (_4, _5); */
4146 match_unsigned_saturation_sub (gimple_stmt_iterator
*gsi
, gassign
*stmt
)
4149 tree lhs
= gimple_assign_lhs (stmt
);
4151 if (gimple_unsigned_integer_sat_sub (lhs
, ops
, NULL
))
4152 build_saturation_binary_arith_call_and_replace (gsi
, IFN_SAT_SUB
, lhs
,
4157 * Try to match saturation unsigned sub.
4158 * <bb 2> [local count: 1073741824]:
4159 * if (x_2(D) > y_3(D))
4160 * goto <bb 3>; [50.00%]
4162 * goto <bb 4>; [50.00%]
4164 * <bb 3> [local count: 536870912]:
4165 * _4 = x_2(D) - y_3(D);
4167 * <bb 4> [local count: 1073741824]:
4168 * # _1 = PHI <0(2), _4(3)>
4170 * <bb 4> [local count: 1073741824]:
4171 * _1 = .SAT_SUB (x_2(D), y_3(D)); */
4173 match_saturation_sub (gimple_stmt_iterator
*gsi
, gphi
*phi
)
4175 if (gimple_phi_num_args (phi
) != 2)
4179 tree phi_result
= gimple_phi_result (phi
);
4181 if (!gimple_unsigned_integer_sat_sub (phi_result
, ops
, NULL
)
4182 && !gimple_signed_integer_sat_sub (phi_result
, ops
, NULL
))
4185 return build_saturation_binary_arith_call_and_insert (gsi
, IFN_SAT_SUB
,
4191 * Try to match saturation unsigned sub.
4194 * overflow_5 = x_4(D) > 255;
4195 * _1 = (unsigned char) x_4(D);
4196 * _2 = (unsigned char) overflow_5;
4200 * _6 = .SAT_TRUNC (x_4(D));
4203 match_unsigned_saturation_trunc (gimple_stmt_iterator
*gsi
, gassign
*stmt
)
4206 tree lhs
= gimple_assign_lhs (stmt
);
4207 tree type
= TREE_TYPE (lhs
);
4209 if (gimple_unsigned_integer_sat_trunc (lhs
, ops
, NULL
)
4210 && direct_internal_fn_supported_p (IFN_SAT_TRUNC
,
4211 tree_pair (type
, TREE_TYPE (ops
[0])),
4214 gcall
*call
= gimple_build_call_internal (IFN_SAT_TRUNC
, 1, ops
[0]);
4215 gimple_call_set_lhs (call
, lhs
);
4216 gsi_replace (gsi
, call
, /* update_eh_info */ true);
4221 * Try to match saturation truncate.
4223 * x.0_1 = (unsigned long) x_4(D);
4224 * _2 = x.0_1 + 2147483648;
4225 * if (_2 > 4294967295)
4226 * goto <bb 4>; [50.00%]
4228 * goto <bb 3>; [50.00%]
4232 * ;; basic block 3, loop depth 0
4234 * trunc_5 = (int32_t) x_4(D);
4235 * goto <bb 5>; [100.00%]
4238 * ;; basic block 4, loop depth 0
4243 * _10 = _9 ^ 2147483647;
4246 * ;; basic block 5, loop depth 0
4249 * # _3 = PHI <trunc_5(3), _10(4)>
4251 * _6 = .SAT_TRUNC (x_4(D));
4255 match_saturation_trunc (gimple_stmt_iterator
*gsi
, gphi
*phi
)
4257 if (gimple_phi_num_args (phi
) != 2)
4261 tree phi_result
= gimple_phi_result (phi
);
4262 tree type
= TREE_TYPE (phi_result
);
4264 if (!gimple_unsigned_integer_sat_trunc (phi_result
, ops
, NULL
)
4265 && !gimple_signed_integer_sat_trunc (phi_result
, ops
, NULL
))
4268 if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC
,
4269 tree_pair (type
, TREE_TYPE (ops
[0])),
4273 gcall
*call
= gimple_build_call_internal (IFN_SAT_TRUNC
, 1, ops
[0]);
4274 gimple_call_set_lhs (call
, phi_result
);
4275 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
4280 /* Recognize for unsigned x
4283 where there are other uses of x and replace it with
4284 _7 = .SUB_OVERFLOW (y, z);
4285 x = REALPART_EXPR <_7>;
4286 _8 = IMAGPART_EXPR <_7>;
4288 and similarly for addition.
4295 where y and z have unsigned types with maximum max
4296 and there are other uses of x and all of those cast x
4297 back to that unsigned type and again replace it with
4298 _7 = .ADD_OVERFLOW (y, z);
4299 _9 = REALPART_EXPR <_7>;
4300 _8 = IMAGPART_EXPR <_7>;
4302 and replace (utype) x with _9.
4303 Or with x >> popcount (max) instead of x > max.
4309 _7 = .ADD_OVERFLOW (y, z);
4310 _8 = IMAGPART_EXPR <_7>;
4316 goto <bb 3>; [50.00%]
4318 goto <bb 4>; [50.00%]
4320 <bb 3> [local count: 536870913]:
4325 <bb 4> [local count: 1073741824]:
4326 # iftmp.0_3 = PHI <_10(3), 0(2)>
4328 _7 = .MUL_OVERFLOW (x, y);
4329 z = IMAGPART_EXPR <_7>;
4330 _8 = IMAGPART_EXPR <_7>;
4332 iftmp.0_3 = (int) _9; */
4335 match_arith_overflow (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
4336 enum tree_code code
, bool *cfg_changed
)
4338 tree lhs
= gimple_assign_lhs (stmt
);
4339 tree type
= TREE_TYPE (lhs
);
4340 use_operand_p use_p
;
4341 imm_use_iterator iter
;
4342 bool use_seen
= false;
4343 bool ovf_use_seen
= false;
4345 gimple
*add_stmt
= NULL
;
4346 bool add_first
= false;
4347 gimple
*cond_stmt
= NULL
;
4348 gimple
*cast_stmt
= NULL
;
4349 tree cast_lhs
= NULL_TREE
;
4351 gcc_checking_assert (code
== PLUS_EXPR
4352 || code
== MINUS_EXPR
4353 || code
== MULT_EXPR
4354 || code
== BIT_NOT_EXPR
);
4355 if (!INTEGRAL_TYPE_P (type
)
4356 || !TYPE_UNSIGNED (type
)
4357 || has_zero_uses (lhs
)
4358 || (code
!= PLUS_EXPR
4359 && code
!= MULT_EXPR
4360 && optab_handler (code
== MINUS_EXPR
? usubv4_optab
: uaddv4_optab
,
4361 TYPE_MODE (type
)) == CODE_FOR_nothing
))
4364 tree rhs1
= gimple_assign_rhs1 (stmt
);
4365 tree rhs2
= gimple_assign_rhs2 (stmt
);
4366 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4368 use_stmt
= USE_STMT (use_p
);
4369 if (is_gimple_debug (use_stmt
))
4372 tree other
= NULL_TREE
;
4373 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, NULL_TREE
, &other
))
4375 if (code
== BIT_NOT_EXPR
)
4378 if (TREE_CODE (other
) != SSA_NAME
)
4384 cond_stmt
= use_stmt
;
4386 ovf_use_seen
= true;
4391 if (code
== MULT_EXPR
4392 && cast_stmt
== NULL
4393 && gimple_assign_cast_p (use_stmt
))
4395 cast_lhs
= gimple_assign_lhs (use_stmt
);
4396 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
4397 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
4398 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
4399 == TYPE_PRECISION (TREE_TYPE (lhs
))))
4400 cast_stmt
= use_stmt
;
4402 cast_lhs
= NULL_TREE
;
4405 if (ovf_use_seen
&& use_seen
)
4410 && code
== MULT_EXPR
4413 if (TREE_CODE (rhs1
) != SSA_NAME
4414 || (TREE_CODE (rhs2
) != SSA_NAME
&& TREE_CODE (rhs2
) != INTEGER_CST
))
4416 FOR_EACH_IMM_USE_FAST (use_p
, iter
, cast_lhs
)
4418 use_stmt
= USE_STMT (use_p
);
4419 if (is_gimple_debug (use_stmt
))
4422 if (arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4424 ovf_use_seen
= true;
4430 cast_lhs
= NULL_TREE
;
4433 tree maxval
= NULL_TREE
;
4435 || (code
!= MULT_EXPR
&& (code
== BIT_NOT_EXPR
? use_seen
: !use_seen
))
4436 || (code
== PLUS_EXPR
4437 && optab_handler (uaddv4_optab
,
4438 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4439 || (code
== MULT_EXPR
4440 && optab_handler (cast_stmt
? mulv4_optab
: umulv4_optab
,
4441 TYPE_MODE (type
)) == CODE_FOR_nothing
4444 || !can_mult_highpart_p (TYPE_MODE (type
), true))))
4446 if (code
!= PLUS_EXPR
)
4448 if (TREE_CODE (rhs1
) != SSA_NAME
4449 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1
)))
4451 rhs1
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1
));
4452 tree type1
= TREE_TYPE (rhs1
);
4453 if (!INTEGRAL_TYPE_P (type1
)
4454 || !TYPE_UNSIGNED (type1
)
4455 || TYPE_PRECISION (type1
) >= TYPE_PRECISION (type
)
4456 || (TYPE_PRECISION (type1
)
4457 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1
))))
4459 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4461 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2
),
4462 TYPE_PRECISION (type1
),
4465 rhs2
= fold_convert (type1
, rhs2
);
4469 if (TREE_CODE (rhs2
) != SSA_NAME
4470 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2
)))
4472 rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2
));
4473 tree type2
= TREE_TYPE (rhs2
);
4474 if (!INTEGRAL_TYPE_P (type2
)
4475 || !TYPE_UNSIGNED (type2
)
4476 || TYPE_PRECISION (type2
) >= TYPE_PRECISION (type
)
4477 || (TYPE_PRECISION (type2
)
4478 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2
))))
4481 if (TYPE_PRECISION (type1
) >= TYPE_PRECISION (TREE_TYPE (rhs2
)))
4484 type
= TREE_TYPE (rhs2
);
4486 if (TREE_CODE (type
) != INTEGER_TYPE
4487 || optab_handler (uaddv4_optab
,
4488 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4491 maxval
= wide_int_to_tree (type
, wi::max_value (TYPE_PRECISION (type
),
4493 ovf_use_seen
= false;
4495 basic_block use_bb
= NULL
;
4496 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4498 use_stmt
= USE_STMT (use_p
);
4499 if (is_gimple_debug (use_stmt
))
4502 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, maxval
, NULL
))
4504 ovf_use_seen
= true;
4505 use_bb
= gimple_bb (use_stmt
);
4509 if (!gimple_assign_cast_p (use_stmt
)
4510 || gimple_assign_rhs_code (use_stmt
) == VIEW_CONVERT_EXPR
)
4512 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4513 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs
))
4514 || (TYPE_PRECISION (TREE_TYPE (use_lhs
))
4515 > TYPE_PRECISION (type
)))
4522 if (!useless_type_conversion_p (type
, TREE_TYPE (rhs1
)))
4526 tree new_rhs1
= make_ssa_name (type
);
4527 gimple
*g
= gimple_build_assign (new_rhs1
, NOP_EXPR
, rhs1
);
4528 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4531 else if (!useless_type_conversion_p (type
, TREE_TYPE (rhs2
)))
4535 tree new_rhs2
= make_ssa_name (type
);
4536 gimple
*g
= gimple_build_assign (new_rhs2
, NOP_EXPR
, rhs2
);
4537 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4542 /* If there are no uses of the wider addition, check if
4543 forwprop has not created a narrower addition.
4544 Require it to be in the same bb as the overflow check. */
4545 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs1
)
4547 use_stmt
= USE_STMT (use_p
);
4548 if (is_gimple_debug (use_stmt
))
4551 if (use_stmt
== stmt
)
4554 if (!is_gimple_assign (use_stmt
)
4555 || gimple_bb (use_stmt
) != use_bb
4556 || gimple_assign_rhs_code (use_stmt
) != PLUS_EXPR
)
4559 if (gimple_assign_rhs1 (use_stmt
) == rhs1
)
4561 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt
),
4565 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
4567 if (gimple_assign_rhs1 (use_stmt
) != rhs2
)
4573 add_stmt
= use_stmt
;
4576 if (add_stmt
== NULL
)
4579 /* If stmt and add_stmt are in the same bb, we need to find out
4580 which one is earlier. If they are in different bbs, we've
4581 checked add_stmt is in the same bb as one of the uses of the
4582 stmt lhs, so stmt needs to dominate add_stmt too. */
4583 if (gimple_bb (stmt
) == gimple_bb (add_stmt
))
4585 gimple_stmt_iterator gsif
= *gsi
;
4586 gimple_stmt_iterator gsib
= *gsi
;
4588 /* Search both forward and backward from stmt and have a small
4590 for (i
= 0; i
< 128; i
++)
4592 if (!gsi_end_p (gsib
))
4594 gsi_prev_nondebug (&gsib
);
4595 if (gsi_stmt (gsib
) == add_stmt
)
4601 else if (gsi_end_p (gsif
))
4603 if (!gsi_end_p (gsif
))
4605 gsi_next_nondebug (&gsif
);
4606 if (gsi_stmt (gsif
) == add_stmt
)
4613 *gsi
= gsi_for_stmt (add_stmt
);
4618 if (code
== BIT_NOT_EXPR
)
4619 *gsi
= gsi_for_stmt (cond_stmt
);
4621 auto_vec
<gimple
*, 8> mul_stmts
;
4622 if (code
== MULT_EXPR
&& cast_stmt
)
4624 type
= TREE_TYPE (cast_lhs
);
4625 gimple
*g
= SSA_NAME_DEF_STMT (rhs1
);
4626 if (gimple_assign_cast_p (g
)
4627 && useless_type_conversion_p (type
,
4628 TREE_TYPE (gimple_assign_rhs1 (g
)))
4629 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4630 rhs1
= gimple_assign_rhs1 (g
);
4633 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs1
);
4634 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4635 rhs1
= gimple_assign_lhs (g
);
4636 mul_stmts
.quick_push (g
);
4638 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4639 rhs2
= fold_convert (type
, rhs2
);
4642 g
= SSA_NAME_DEF_STMT (rhs2
);
4643 if (gimple_assign_cast_p (g
)
4644 && useless_type_conversion_p (type
,
4645 TREE_TYPE (gimple_assign_rhs1 (g
)))
4646 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4647 rhs2
= gimple_assign_rhs1 (g
);
4650 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs2
);
4651 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4652 rhs2
= gimple_assign_lhs (g
);
4653 mul_stmts
.quick_push (g
);
4657 tree ctype
= build_complex_type (type
);
4658 gcall
*g
= gimple_build_call_internal (code
== MULT_EXPR
4660 : code
!= MINUS_EXPR
4661 ? IFN_ADD_OVERFLOW
: IFN_SUB_OVERFLOW
,
4663 tree ctmp
= make_ssa_name (ctype
);
4664 gimple_call_set_lhs (g
, ctmp
);
4665 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4666 tree new_lhs
= (maxval
|| cast_stmt
) ? make_ssa_name (type
) : lhs
;
4668 if (code
!= BIT_NOT_EXPR
)
4670 g2
= gimple_build_assign (new_lhs
, REALPART_EXPR
,
4671 build1 (REALPART_EXPR
, type
, ctmp
));
4672 if (maxval
|| cast_stmt
)
4674 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4676 *gsi
= gsi_for_stmt (stmt
);
4679 gsi_replace (gsi
, g2
, true);
4680 if (code
== MULT_EXPR
)
4682 mul_stmts
.quick_push (g
);
4683 mul_stmts
.quick_push (g2
);
4686 g2
= gimple_build_assign (lhs
, NOP_EXPR
, new_lhs
);
4687 gsi_replace (gsi
, g2
, true);
4688 mul_stmts
.quick_push (g2
);
4692 tree ovf
= make_ssa_name (type
);
4693 g2
= gimple_build_assign (ovf
, IMAGPART_EXPR
,
4694 build1 (IMAGPART_EXPR
, type
, ctmp
));
4695 if (code
!= BIT_NOT_EXPR
)
4696 gsi_insert_after (gsi
, g2
, GSI_NEW_STMT
);
4698 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4699 if (code
== MULT_EXPR
)
4700 mul_stmts
.quick_push (g2
);
4702 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, cast_lhs
? cast_lhs
: lhs
)
4704 if (is_gimple_debug (use_stmt
))
4707 gimple
*orig_use_stmt
= use_stmt
;
4708 int ovf_use
= arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4712 gcc_assert (code
!= BIT_NOT_EXPR
);
4715 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4716 gimple_assign_set_rhs1 (use_stmt
, new_lhs
);
4717 if (useless_type_conversion_p (TREE_TYPE (use_lhs
),
4718 TREE_TYPE (new_lhs
)))
4719 gimple_assign_set_rhs_code (use_stmt
, SSA_NAME
);
4720 update_stmt (use_stmt
);
4724 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4726 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4727 gimple_cond_set_lhs (cond_stmt
, ovf
);
4728 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4729 gimple_cond_set_code (cond_stmt
, ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4733 gcc_checking_assert (is_gimple_assign (use_stmt
));
4734 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
4736 if (gimple_assign_rhs_code (use_stmt
) == RSHIFT_EXPR
)
4738 g2
= gimple_build_assign (make_ssa_name (boolean_type_node
),
4739 ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4740 ovf
, build_int_cst (type
, 0));
4741 gimple_stmt_iterator gsiu
= gsi_for_stmt (use_stmt
);
4742 gsi_insert_before (&gsiu
, g2
, GSI_SAME_STMT
);
4743 gimple_assign_set_rhs_with_ops (&gsiu
, NOP_EXPR
,
4744 gimple_assign_lhs (g2
));
4745 update_stmt (use_stmt
);
4747 single_imm_use (gimple_assign_lhs (use_stmt
), &use
,
4749 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4751 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4752 gimple_cond_set_lhs (cond_stmt
, ovf
);
4753 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4757 gcc_checking_assert (is_gimple_assign (use_stmt
));
4758 if (gimple_assign_rhs_class (use_stmt
)
4759 == GIMPLE_BINARY_RHS
)
4761 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4762 gimple_assign_set_rhs2 (use_stmt
,
4763 build_int_cst (type
, 0));
4765 else if (gimple_assign_cast_p (use_stmt
))
4766 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4769 tree_code sc
= gimple_assign_rhs_code (use_stmt
);
4770 gcc_checking_assert (sc
== COND_EXPR
);
4771 tree cond
= gimple_assign_rhs1 (use_stmt
);
4772 cond
= build2 (TREE_CODE (cond
),
4773 boolean_type_node
, ovf
,
4774 build_int_cst (type
, 0));
4775 gimple_assign_set_rhs1 (use_stmt
, cond
);
4778 update_stmt (use_stmt
);
4779 gsi_remove (&gsiu
, true);
4780 gsiu
= gsi_for_stmt (g2
);
4781 gsi_remove (&gsiu
, true);
4786 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4787 gimple_assign_set_rhs2 (use_stmt
, build_int_cst (type
, 0));
4788 gimple_assign_set_rhs_code (use_stmt
,
4790 ? NE_EXPR
: EQ_EXPR
);
4795 gcc_checking_assert (gimple_assign_rhs_code (use_stmt
)
4797 tree cond
= build2 (ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4798 boolean_type_node
, ovf
,
4799 build_int_cst (type
, 0));
4800 gimple_assign_set_rhs1 (use_stmt
, cond
);
4803 update_stmt (use_stmt
);
4804 if (code
== MULT_EXPR
&& use_stmt
!= orig_use_stmt
)
4806 gimple_stmt_iterator gsi2
= gsi_for_stmt (orig_use_stmt
);
4807 maybe_optimize_guarding_check (mul_stmts
, use_stmt
, orig_use_stmt
,
4811 if (single_imm_use (gimple_assign_lhs (orig_use_stmt
), &use
,
4813 && gimple_assign_cast_p (cast_stmt
))
4815 gimple_stmt_iterator gsi3
= gsi_for_stmt (cast_stmt
);
4816 gsi_remove (&gsi3
, true);
4817 release_ssa_name (gimple_assign_lhs (cast_stmt
));
4819 gsi_remove (&gsi2
, true);
4820 release_ssa_name (gimple_assign_lhs (orig_use_stmt
));
4825 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
4826 gsi_remove (&gsi2
, true);
4829 gimple
*g
= gimple_build_assign (gimple_assign_lhs (add_stmt
),
4831 gsi2
= gsi_for_stmt (add_stmt
);
4832 gsi_replace (&gsi2
, g
, true);
4835 else if (code
== BIT_NOT_EXPR
)
4837 *gsi
= gsi_for_stmt (stmt
);
4838 gsi_remove (gsi
, true);
4839 release_ssa_name (lhs
);
4845 /* Helper of match_uaddc_usubc. Look through an integral cast
4846 which should preserve [0, 1] range value (unless source has
4847 1-bit signed type) and the cast has single use. */
4850 uaddc_cast (gimple
*g
)
4852 if (!gimple_assign_cast_p (g
))
4854 tree op
= gimple_assign_rhs1 (g
);
4855 if (TREE_CODE (op
) == SSA_NAME
4856 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
4857 && (TYPE_PRECISION (TREE_TYPE (op
)) > 1
4858 || TYPE_UNSIGNED (TREE_TYPE (op
)))
4859 && has_single_use (gimple_assign_lhs (g
)))
4860 return SSA_NAME_DEF_STMT (op
);
4864 /* Helper of match_uaddc_usubc. Look through a NE_EXPR
4865 comparison with 0 which also preserves [0, 1] value range. */
4868 uaddc_ne0 (gimple
*g
)
4870 if (is_gimple_assign (g
)
4871 && gimple_assign_rhs_code (g
) == NE_EXPR
4872 && integer_zerop (gimple_assign_rhs2 (g
))
4873 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
4874 && has_single_use (gimple_assign_lhs (g
)))
4875 return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g
));
4879 /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4883 uaddc_is_cplxpart (gimple
*g
, tree_code part
)
4885 return (is_gimple_assign (g
)
4886 && gimple_assign_rhs_code (g
) == part
4887 && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g
), 0)) == SSA_NAME
);
4890 /* Try to match e.g.
4891 _29 = .ADD_OVERFLOW (_3, _4);
4892 _30 = REALPART_EXPR <_29>;
4893 _31 = IMAGPART_EXPR <_29>;
4894 _32 = .ADD_OVERFLOW (_30, _38);
4895 _33 = REALPART_EXPR <_32>;
4896 _34 = IMAGPART_EXPR <_32>;
4899 _36 = .UADDC (_3, _4, _38);
4900 _33 = REALPART_EXPR <_36>;
4901 _35 = IMAGPART_EXPR <_36>;
4903 _22 = .SUB_OVERFLOW (_6, _5);
4904 _23 = REALPART_EXPR <_22>;
4905 _24 = IMAGPART_EXPR <_22>;
4906 _25 = .SUB_OVERFLOW (_23, _37);
4907 _26 = REALPART_EXPR <_25>;
4908 _27 = IMAGPART_EXPR <_25>;
4911 _29 = .USUBC (_6, _5, _37);
4912 _26 = REALPART_EXPR <_29>;
4913 _288 = IMAGPART_EXPR <_29>;
4914 provided _38 or _37 above have [0, 1] range
4915 and _3, _4 and _30 or _6, _5 and _23 are unsigned
4916 integral types with the same precision. Whether + or | or ^ is
4917 used on the IMAGPART_EXPR results doesn't matter, with one of
4918 added or subtracted operands in [0, 1] range at most one
4919 .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
4922 match_uaddc_usubc (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree_code code
)
4925 rhs
[0] = gimple_assign_rhs1 (stmt
);
4926 rhs
[1] = gimple_assign_rhs2 (stmt
);
4929 tree type
= TREE_TYPE (rhs
[0]);
4930 if (!INTEGRAL_TYPE_P (type
) || !TYPE_UNSIGNED (type
))
4933 auto_vec
<gimple
*, 2> temp_stmts
;
4934 if (code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
)
4936 /* If overflow flag is ignored on the MSB limb, we can end up with
4937 the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
4938 or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
4939 thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
4940 the limb below the MSB, but also create another .UADDC/.USUBC call
4943 First look through assignments with the same rhs code as CODE,
4944 with the exception that subtraction of a constant is canonicalized
4945 into addition of its negation. rhs[0] will be minuend for
4946 subtractions and one of addends for addition, all other assigned
4947 rhs[i] operands will be subtrahends or other addends. */
4948 while (TREE_CODE (rhs
[0]) == SSA_NAME
&& !rhs
[3])
4950 gimple
*g
= SSA_NAME_DEF_STMT (rhs
[0]);
4951 if (has_single_use (rhs
[0])
4952 && is_gimple_assign (g
)
4953 && (gimple_assign_rhs_code (g
) == code
4954 || (code
== MINUS_EXPR
4955 && gimple_assign_rhs_code (g
) == PLUS_EXPR
4956 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
)))
4958 tree r2
= gimple_assign_rhs2 (g
);
4959 if (gimple_assign_rhs_code (g
) != code
)
4961 r2
= const_unop (NEGATE_EXPR
, TREE_TYPE (r2
), r2
);
4965 rhs
[0] = gimple_assign_rhs1 (g
);
4966 tree
&r
= rhs
[2] ? rhs
[3] : rhs
[2];
4968 temp_stmts
.quick_push (g
);
4973 for (int i
= 1; i
<= 2; ++i
)
4974 while (rhs
[i
] && TREE_CODE (rhs
[i
]) == SSA_NAME
&& !rhs
[3])
4976 gimple
*g
= SSA_NAME_DEF_STMT (rhs
[i
]);
4977 if (has_single_use (rhs
[i
])
4978 && is_gimple_assign (g
)
4979 && gimple_assign_rhs_code (g
) == PLUS_EXPR
)
4981 rhs
[i
] = gimple_assign_rhs1 (g
);
4983 rhs
[3] = gimple_assign_rhs2 (g
);
4985 rhs
[2] = gimple_assign_rhs2 (g
);
4986 temp_stmts
.quick_push (g
);
4991 /* If there are just 3 addends or one minuend and two subtrahends,
4992 check for UADDC or USUBC being pattern recognized earlier.
4993 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
4994 got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
4996 if (rhs
[2] && !rhs
[3])
4998 for (int i
= (code
== MINUS_EXPR
? 1 : 0); i
< 3; ++i
)
4999 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
5001 gimple
*im
= uaddc_cast (SSA_NAME_DEF_STMT (rhs
[i
]));
5002 im
= uaddc_ne0 (im
);
5003 if (uaddc_is_cplxpart (im
, IMAGPART_EXPR
))
5005 /* We found one of the 3 addends or 2 subtrahends to be
5006 __imag__ of something, verify it is .UADDC/.USUBC. */
5007 tree rhs1
= gimple_assign_rhs1 (im
);
5008 gimple
*ovf
= SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1
, 0));
5009 tree ovf_lhs
= NULL_TREE
;
5010 tree ovf_arg1
= NULL_TREE
, ovf_arg2
= NULL_TREE
;
5011 if (gimple_call_internal_p (ovf
, code
== PLUS_EXPR
5013 : IFN_SUB_OVERFLOW
))
5015 /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
5016 This is for the case of 2 chained .UADDC/.USUBC,
5017 where the first one uses 0 carry-in and the second
5018 one ignores the carry-out.
5020 _16 = .ADD_OVERFLOW (_1, _2);
5021 _17 = REALPART_EXPR <_16>;
5022 _18 = IMAGPART_EXPR <_16>;
5025 where the first 3 statements come from the lower
5026 limb addition and the last 2 from the higher limb
5027 which ignores carry-out. */
5028 ovf_lhs
= gimple_call_lhs (ovf
);
5029 tree ovf_lhs_type
= TREE_TYPE (TREE_TYPE (ovf_lhs
));
5030 ovf_arg1
= gimple_call_arg (ovf
, 0);
5031 ovf_arg2
= gimple_call_arg (ovf
, 1);
5032 /* In that case we need to punt if the types don't
5034 if (!types_compatible_p (type
, ovf_lhs_type
)
5035 || !types_compatible_p (type
, TREE_TYPE (ovf_arg1
))
5036 || !types_compatible_p (type
,
5037 TREE_TYPE (ovf_arg2
)))
5038 ovf_lhs
= NULL_TREE
;
5041 for (int i
= (code
== PLUS_EXPR
? 1 : 0);
5044 tree r
= gimple_call_arg (ovf
, i
);
5045 if (TREE_CODE (r
) != SSA_NAME
)
5047 if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r
),
5050 /* Punt if one of the args which isn't
5051 subtracted isn't __real__; that could
5052 then prevent better match later.
5054 _3 = .ADD_OVERFLOW (_1, _2);
5055 _4 = REALPART_EXPR <_3>;
5056 _5 = IMAGPART_EXPR <_3>;
5057 _7 = .ADD_OVERFLOW (_4, _6);
5058 _8 = REALPART_EXPR <_7>;
5059 _9 = IMAGPART_EXPR <_7>;
5063 We want to match this when called on
5064 the last stmt as a pair of .UADDC calls,
5065 but without this check we could turn
5066 that prematurely on _13 = _12 + _9;
5067 stmt into .UADDC with 0 carry-in just
5068 on the second .ADD_OVERFLOW call and
5069 another replacing the _12 and _13
5071 ovf_lhs
= NULL_TREE
;
5078 use_operand_p use_p
;
5079 imm_use_iterator iter
;
5080 tree re_lhs
= NULL_TREE
;
5081 FOR_EACH_IMM_USE_FAST (use_p
, iter
, ovf_lhs
)
5083 gimple
*use_stmt
= USE_STMT (use_p
);
5084 if (is_gimple_debug (use_stmt
))
5088 if (!uaddc_is_cplxpart (use_stmt
,
5091 ovf_lhs
= NULL_TREE
;
5094 re_lhs
= gimple_assign_lhs (use_stmt
);
5096 if (ovf_lhs
&& re_lhs
)
5098 FOR_EACH_IMM_USE_FAST (use_p
, iter
, re_lhs
)
5100 gimple
*use_stmt
= USE_STMT (use_p
);
5101 if (is_gimple_debug (use_stmt
))
5104 = gimple_call_internal_fn (ovf
);
5105 /* Punt if the __real__ of lhs is used
5106 in the same .*_OVERFLOW call.
5108 _3 = .ADD_OVERFLOW (_1, _2);
5109 _4 = REALPART_EXPR <_3>;
5110 _5 = IMAGPART_EXPR <_3>;
5111 _7 = .ADD_OVERFLOW (_4, _6);
5112 _8 = REALPART_EXPR <_7>;
5113 _9 = IMAGPART_EXPR <_7>;
5117 We want to match this when called on
5118 the last stmt as a pair of .UADDC calls,
5119 but without this check we could turn
5120 that prematurely on _13 = _12 + _5;
5121 stmt into .UADDC with 0 carry-in just
5122 on the first .ADD_OVERFLOW call and
5123 another replacing the _12 and _13
5125 if (gimple_call_internal_p (use_stmt
, ifn
))
5127 ovf_lhs
= NULL_TREE
;
5135 || gimple_call_internal_p (ovf
,
5137 ? IFN_UADDC
: IFN_USUBC
))
5138 && (optab_handler (code
== PLUS_EXPR
5139 ? uaddc5_optab
: usubc5_optab
,
5141 != CODE_FOR_nothing
))
5143 /* And in that case build another .UADDC/.USUBC
5144 call for the most significand limb addition.
5145 Overflow bit is ignored here. */
5147 std::swap (rhs
[i
], rhs
[2]);
5149 = gimple_build_call_internal (code
== PLUS_EXPR
5154 tree nlhs
= make_ssa_name (build_complex_type (type
));
5155 gimple_call_set_lhs (g
, nlhs
);
5156 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5157 tree ilhs
= gimple_assign_lhs (stmt
);
5158 g
= gimple_build_assign (ilhs
, REALPART_EXPR
,
5159 build1 (REALPART_EXPR
,
5162 gsi_replace (gsi
, g
, true);
5163 /* And if it is initialized from result of __imag__
5164 of .{ADD,SUB}_OVERFLOW call, replace that
5165 call with .U{ADD,SUB}C call with the same arguments,
5166 just 0 added as third argument. This isn't strictly
5167 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5168 produce the same result, but may result in better
5169 generated code on some targets where the backend can
5170 better prepare in how the result will be used. */
5173 tree zero
= build_zero_cst (type
);
5174 g
= gimple_build_call_internal (code
== PLUS_EXPR
5179 gimple_call_set_lhs (g
, ovf_lhs
);
5180 gimple_stmt_iterator gsi2
= gsi_for_stmt (ovf
);
5181 gsi_replace (&gsi2
, g
, true);
5189 if (code
== MINUS_EXPR
&& !rhs
[2])
5191 if (code
== MINUS_EXPR
)
5192 /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
5193 So, for MINUS_EXPR swap the single added rhs operand (others are
5194 subtracted) to rhs[3]. */
5195 std::swap (rhs
[0], rhs
[3]);
5197 /* Walk from both operands of STMT (for +/- even sometimes from
5198 all the 4 addends or 3 subtrahends), see through casts and != 0
5199 statements which would preserve [0, 1] range of values and
5200 check which is initialized from __imag__. */
5201 gimple
*im1
= NULL
, *im2
= NULL
;
5202 for (int i
= 0; i
< (code
== MINUS_EXPR
? 3 : 4); i
++)
5203 if (rhs
[i
] && TREE_CODE (rhs
[i
]) == SSA_NAME
)
5205 gimple
*im
= uaddc_cast (SSA_NAME_DEF_STMT (rhs
[i
]));
5206 im
= uaddc_ne0 (im
);
5207 if (uaddc_is_cplxpart (im
, IMAGPART_EXPR
))
5213 std::swap (rhs
[0], rhs
[i
]);
5219 std::swap (rhs
[1], rhs
[i
]);
5224 /* If we don't find at least two, punt. */
5227 /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
5228 either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
5229 uaddc5/usubc5 named pattern for the corresponding mode. */
5231 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1
), 0));
5233 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2
), 0));
5235 if (!is_gimple_call (ovf1
)
5236 || !gimple_call_internal_p (ovf1
)
5237 || ((ifn
= gimple_call_internal_fn (ovf1
)) != IFN_ADD_OVERFLOW
5238 && ifn
!= IFN_SUB_OVERFLOW
)
5239 || !gimple_call_internal_p (ovf2
, ifn
)
5240 || optab_handler (ifn
== IFN_ADD_OVERFLOW
? uaddc5_optab
: usubc5_optab
,
5241 TYPE_MODE (type
)) == CODE_FOR_nothing
5243 && optab_handler (code
== PLUS_EXPR
? uaddc5_optab
: usubc5_optab
,
5244 TYPE_MODE (type
)) == CODE_FOR_nothing
))
5246 tree arg1
, arg2
, arg3
= NULL_TREE
;
5247 gimple
*re1
= NULL
, *re2
= NULL
;
5248 /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
5249 should be initialized from __real__ of the other of the two calls.
5250 Though, for .SUB_OVERFLOW, it has to be the first argument, not the
5252 for (int i
= (ifn
== IFN_ADD_OVERFLOW
? 1 : 0); i
>= 0; --i
)
5253 for (gimple
*ovf
= ovf1
; ovf
; ovf
= (ovf
== ovf1
? ovf2
: NULL
))
5255 tree arg
= gimple_call_arg (ovf
, i
);
5256 if (TREE_CODE (arg
) != SSA_NAME
)
5258 re1
= SSA_NAME_DEF_STMT (arg
);
5259 if (uaddc_is_cplxpart (re1
, REALPART_EXPR
)
5260 && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1
), 0))
5261 == (ovf
== ovf1
? ovf2
: ovf1
)))
5265 /* Make sure ovf2 is the .*_OVERFLOW call with argument
5266 initialized from __real__ of ovf1. */
5267 std::swap (rhs
[0], rhs
[1]);
5268 std::swap (im1
, im2
);
5269 std::swap (ovf1
, ovf2
);
5271 arg3
= gimple_call_arg (ovf
, 1 - i
);
5278 arg1
= gimple_call_arg (ovf1
, 0);
5279 arg2
= gimple_call_arg (ovf1
, 1);
5280 if (!types_compatible_p (type
, TREE_TYPE (arg1
)))
5282 int kind
[2] = { 0, 0 };
5283 tree arg_im
[2] = { NULL_TREE
, NULL_TREE
};
5284 /* At least one of arg2 and arg3 should have type compatible
5285 with arg1/rhs[0], and the other one should have value in [0, 1]
5286 range. If both are in [0, 1] range and type compatible with
5287 arg1/rhs[0], try harder to find after looking through casts,
5288 != 0 comparisons which one is initialized to __imag__ of
5289 .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
5290 for (int i
= 0; i
< 2; ++i
)
5292 tree arg
= i
== 0 ? arg2
: arg3
;
5293 if (types_compatible_p (type
, TREE_TYPE (arg
)))
5295 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg
))
5296 || (TYPE_PRECISION (TREE_TYPE (arg
)) == 1
5297 && !TYPE_UNSIGNED (TREE_TYPE (arg
))))
5299 if (tree_zero_one_valued_p (arg
))
5301 if (TREE_CODE (arg
) == SSA_NAME
)
5303 gimple
*g
= SSA_NAME_DEF_STMT (arg
);
5304 if (gimple_assign_cast_p (g
))
5306 tree op
= gimple_assign_rhs1 (g
);
5307 if (TREE_CODE (op
) == SSA_NAME
5308 && INTEGRAL_TYPE_P (TREE_TYPE (op
)))
5309 g
= SSA_NAME_DEF_STMT (op
);
5312 if (!uaddc_is_cplxpart (g
, IMAGPART_EXPR
))
5314 arg_im
[i
] = gimple_assign_lhs (g
);
5315 g
= SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g
), 0));
5316 if (!is_gimple_call (g
) || !gimple_call_internal_p (g
))
5318 switch (gimple_call_internal_fn (g
))
5320 case IFN_ADD_OVERFLOW
:
5321 case IFN_SUB_OVERFLOW
:
5331 /* Make arg2 the one with compatible type and arg3 the one
5332 with [0, 1] range. If both is true for both operands,
5333 prefer as arg3 result of __imag__ of some ifn. */
5334 if ((kind
[0] & 1) == 0 || ((kind
[1] & 1) != 0 && kind
[0] > kind
[1]))
5336 std::swap (arg2
, arg3
);
5337 std::swap (kind
[0], kind
[1]);
5338 std::swap (arg_im
[0], arg_im
[1]);
5340 if ((kind
[0] & 1) == 0 || (kind
[1] & 6) == 0)
5342 if (!has_single_use (gimple_assign_lhs (im1
))
5343 || !has_single_use (gimple_assign_lhs (im2
))
5344 || !has_single_use (gimple_assign_lhs (re1
))
5345 || num_imm_uses (gimple_call_lhs (ovf1
)) != 2)
5347 /* Check that ovf2's result is used in __real__ and set re2
5348 to that statement. */
5349 use_operand_p use_p
;
5350 imm_use_iterator iter
;
5351 tree lhs
= gimple_call_lhs (ovf2
);
5352 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
5354 gimple
*use_stmt
= USE_STMT (use_p
);
5355 if (is_gimple_debug (use_stmt
))
5357 if (use_stmt
== im2
)
5361 if (!uaddc_is_cplxpart (use_stmt
, REALPART_EXPR
))
5365 /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5366 gimple_stmt_iterator gsi2
= gsi_for_stmt (ovf2
);
5368 if ((kind
[1] & 4) != 0 && types_compatible_p (type
, TREE_TYPE (arg_im
[1])))
5370 if ((kind
[1] & 1) == 0)
5372 if (TREE_CODE (arg3
) == INTEGER_CST
)
5373 arg3
= fold_convert (type
, arg3
);
5376 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, arg3
);
5377 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5378 arg3
= gimple_assign_lhs (g
);
5381 g
= gimple_build_call_internal (ifn
== IFN_ADD_OVERFLOW
5382 ? IFN_UADDC
: IFN_USUBC
,
5383 3, arg1
, arg2
, arg3
);
5384 tree nlhs
= make_ssa_name (TREE_TYPE (lhs
));
5385 gimple_call_set_lhs (g
, nlhs
);
5386 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5387 /* In the case where stmt is | or ^ of two overflow flags
5388 or addition of those, replace stmt with __imag__ of the above
5389 added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5390 arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5391 tree ilhs
= rhs
[2] ? make_ssa_name (type
) : gimple_assign_lhs (stmt
);
5392 g
= gimple_build_assign (ilhs
, IMAGPART_EXPR
,
5393 build1 (IMAGPART_EXPR
, TREE_TYPE (ilhs
), nlhs
));
5396 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5397 /* Remove some further statements which can't be kept in the IL because
5398 they can use SSA_NAMEs whose setter is going to be removed too. */
5399 for (gimple
*g2
: temp_stmts
)
5401 gsi2
= gsi_for_stmt (g2
);
5402 gsi_remove (&gsi2
, true);
5407 gsi_replace (gsi
, g
, true);
5408 /* Remove some statements which can't be kept in the IL because they
5409 use SSA_NAME whose setter is going to be removed too. */
5411 for (int i
= 0; i
< 2; i
++)
5412 if (rhs1
== gimple_assign_lhs (im2
))
5416 g
= SSA_NAME_DEF_STMT (rhs1
);
5417 rhs1
= gimple_assign_rhs1 (g
);
5418 gsi2
= gsi_for_stmt (g
);
5419 gsi_remove (&gsi2
, true);
5422 gcc_checking_assert (rhs1
== gimple_assign_lhs (im2
));
5423 gsi2
= gsi_for_stmt (im2
);
5424 gsi_remove (&gsi2
, true);
5426 /* Replace the re2 statement with __real__ of the newly added
5427 .UADDC/.USUBC call. */
5430 gsi2
= gsi_for_stmt (re2
);
5431 tree rlhs
= gimple_assign_lhs (re2
);
5432 g
= gimple_build_assign (rlhs
, REALPART_EXPR
,
5433 build1 (REALPART_EXPR
, TREE_TYPE (rlhs
), nlhs
));
5434 gsi_replace (&gsi2
, g
, true);
5438 /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5439 arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5440 replace stmt with __real__ of another .UADDC/.USUBC call which
5441 handles the most significant limb. Overflow flag from this is
5443 g
= gimple_build_call_internal (code
== PLUS_EXPR
5444 ? IFN_UADDC
: IFN_USUBC
,
5445 3, rhs
[3], rhs
[2], ilhs
);
5446 nlhs
= make_ssa_name (TREE_TYPE (lhs
));
5447 gimple_call_set_lhs (g
, nlhs
);
5448 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5449 ilhs
= gimple_assign_lhs (stmt
);
5450 g
= gimple_build_assign (ilhs
, REALPART_EXPR
,
5451 build1 (REALPART_EXPR
, TREE_TYPE (ilhs
), nlhs
));
5452 gsi_replace (gsi
, g
, true);
5454 if (TREE_CODE (arg3
) == SSA_NAME
)
5456 /* When pattern recognizing the second least significant limb
5457 above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5458 check if the [0, 1] range argument (i.e. carry in) isn't the
5459 result of another .{ADD,SUB}_OVERFLOW call (one handling the
5460 least significant limb). Again look through casts and != 0. */
5461 gimple
*im3
= SSA_NAME_DEF_STMT (arg3
);
5462 for (int i
= 0; i
< 2; ++i
)
5464 gimple
*im4
= uaddc_cast (im3
);
5470 im3
= uaddc_ne0 (im3
);
5471 if (uaddc_is_cplxpart (im3
, IMAGPART_EXPR
))
5474 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3
), 0));
5475 if (gimple_call_internal_p (ovf3
, ifn
))
5477 lhs
= gimple_call_lhs (ovf3
);
5478 arg1
= gimple_call_arg (ovf3
, 0);
5479 arg2
= gimple_call_arg (ovf3
, 1);
5480 if (types_compatible_p (type
, TREE_TYPE (TREE_TYPE (lhs
)))
5481 && types_compatible_p (type
, TREE_TYPE (arg1
))
5482 && types_compatible_p (type
, TREE_TYPE (arg2
)))
5484 /* And if it is initialized from result of __imag__
5485 of .{ADD,SUB}_OVERFLOW call, replace that
5486 call with .U{ADD,SUB}C call with the same arguments,
5487 just 0 added as third argument. This isn't strictly
5488 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5489 produce the same result, but may result in better
5490 generated code on some targets where the backend can
5491 better prepare in how the result will be used. */
5492 g
= gimple_build_call_internal (ifn
== IFN_ADD_OVERFLOW
5493 ? IFN_UADDC
: IFN_USUBC
,
5495 build_zero_cst (type
));
5496 gimple_call_set_lhs (g
, lhs
);
5497 gsi2
= gsi_for_stmt (ovf3
);
5498 gsi_replace (&gsi2
, g
, true);
5506 /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5507 (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5508 isn't a direct optab. Also handle `<=`/`>` to be
5509 `x & (x - 1) !=/== x`. */
5512 match_single_bit_test (gimple_stmt_iterator
*gsi
, gimple
*stmt
)
5515 enum tree_code code
;
5516 bool was_le
= false;
5517 if (gimple_code (stmt
) == GIMPLE_COND
)
5519 clhs
= gimple_cond_lhs (stmt
);
5520 crhs
= gimple_cond_rhs (stmt
);
5521 code
= gimple_cond_code (stmt
);
5525 clhs
= gimple_assign_rhs1 (stmt
);
5526 crhs
= gimple_assign_rhs2 (stmt
);
5527 code
= gimple_assign_rhs_code (stmt
);
5529 if (code
!= LE_EXPR
&& code
!= GT_EXPR
5530 && code
!= EQ_EXPR
&& code
!= NE_EXPR
)
5532 if (code
== LE_EXPR
|| code
== GT_EXPR
)
5534 if (TREE_CODE (clhs
) != SSA_NAME
|| !integer_onep (crhs
))
5536 gimple
*call
= SSA_NAME_DEF_STMT (clhs
);
5537 combined_fn cfn
= gimple_call_combined_fn (call
);
5545 if (!has_single_use (clhs
))
5547 tree arg
= gimple_call_arg (call
, 0);
5548 tree type
= TREE_TYPE (arg
);
5549 if (!INTEGRAL_TYPE_P (type
))
5551 bool nonzero_arg
= tree_expr_nonzero_p (arg
);
5552 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, type
, OPTIMIZE_FOR_BOTH
))
5554 /* Tell expand_POPCOUNT the popcount result is only used in equality
5555 comparison with one, so that it can decide based on rtx costs. */
5556 gimple
*g
= gimple_build_call_internal (IFN_POPCOUNT
, 2, arg
,
5557 was_le
? integer_minus_one_node
5558 : (nonzero_arg
? integer_zero_node
5559 : integer_one_node
));
5560 gimple_call_set_lhs (g
, gimple_call_lhs (call
));
5561 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
5562 gsi_replace (&gsi2
, g
, true);
5565 tree argm1
= make_ssa_name (type
);
5566 gimple
*g
= gimple_build_assign (argm1
, PLUS_EXPR
, arg
,
5567 build_int_cst (type
, -1));
5568 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5569 g
= gimple_build_assign (make_ssa_name (type
),
5570 (nonzero_arg
|| was_le
) ? BIT_AND_EXPR
: BIT_XOR_EXPR
,
5572 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5576 argm1
= build_zero_cst (type
);
5577 cmpcode
= code
== LE_EXPR
? EQ_EXPR
: NE_EXPR
;
5579 else if (nonzero_arg
)
5581 argm1
= build_zero_cst (type
);
5585 cmpcode
= code
== EQ_EXPR
? GT_EXPR
: LE_EXPR
;
5586 if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
))
5588 gimple_cond_set_lhs (cond
, gimple_assign_lhs (g
));
5589 gimple_cond_set_rhs (cond
, argm1
);
5590 gimple_cond_set_code (cond
, cmpcode
);
5594 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (g
));
5595 gimple_assign_set_rhs2 (stmt
, argm1
);
5596 gimple_assign_set_rhs_code (stmt
, cmpcode
);
5599 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
5600 gsi_remove (&gsi2
, true);
5601 release_defs (call
);
5604 /* Return true if target has support for divmod. */
5607 target_supports_divmod_p (optab divmod_optab
, optab div_optab
, machine_mode mode
)
5609 /* If target supports hardware divmod insn, use it for divmod. */
5610 if (optab_handler (divmod_optab
, mode
) != CODE_FOR_nothing
)
5613 /* Check if libfunc for divmod is available. */
5614 rtx libfunc
= optab_libfunc (divmod_optab
, mode
);
5615 if (libfunc
!= NULL_RTX
)
5617 /* If optab_handler exists for div_optab, perhaps in a wider mode,
5618 we don't want to use the libfunc even if it exists for given mode. */
5619 machine_mode div_mode
;
5620 FOR_EACH_MODE_FROM (div_mode
, mode
)
5621 if (optab_handler (div_optab
, div_mode
) != CODE_FOR_nothing
)
5624 return targetm
.expand_divmod_libfunc
!= NULL
;
5630 /* Check if stmt is candidate for divmod transform. */
5633 divmod_candidate_p (gassign
*stmt
)
5635 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
5636 machine_mode mode
= TYPE_MODE (type
);
5637 optab divmod_optab
, div_optab
;
5639 if (TYPE_UNSIGNED (type
))
5641 divmod_optab
= udivmod_optab
;
5642 div_optab
= udiv_optab
;
5646 divmod_optab
= sdivmod_optab
;
5647 div_optab
= sdiv_optab
;
5650 tree op1
= gimple_assign_rhs1 (stmt
);
5651 tree op2
= gimple_assign_rhs2 (stmt
);
5653 /* Disable the transform if either is a constant, since division-by-constant
5654 may have specialized expansion. */
5655 if (CONSTANT_CLASS_P (op1
))
5658 if (CONSTANT_CLASS_P (op2
))
5660 if (integer_pow2p (op2
))
5663 if (element_precision (type
) <= HOST_BITS_PER_WIDE_INT
5664 && element_precision (type
) <= BITS_PER_WORD
)
5667 /* If the divisor is not power of 2 and the precision wider than
5668 HWI, expand_divmod punts on that, so in that case it is better
5669 to use divmod optab or libfunc. Similarly if choose_multiplier
5670 might need pre/post shifts of BITS_PER_WORD or more. */
5673 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5674 expand using the [su]divv optabs. */
5675 if (TYPE_OVERFLOW_TRAPS (type
))
5678 if (!target_supports_divmod_p (divmod_optab
, div_optab
, mode
))
5684 /* This function looks for:
5685 t1 = a TRUNC_DIV_EXPR b;
5686 t2 = a TRUNC_MOD_EXPR b;
5687 and transforms it to the following sequence:
5688 complex_tmp = DIVMOD (a, b);
5689 t1 = REALPART_EXPR(a);
5690 t2 = IMAGPART_EXPR(b);
5691 For conditions enabling the transform see divmod_candidate_p().
5693 The pass has three parts:
5694 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5695 other trunc_div_expr and trunc_mod_expr stmts.
5696 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5698 3) Insert DIVMOD call just before top_stmt and update entries in
5699 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5700 IMAGPART_EXPR for mod). */
5703 convert_to_divmod (gassign
*stmt
)
5705 if (stmt_can_throw_internal (cfun
, stmt
)
5706 || !divmod_candidate_p (stmt
))
5709 tree op1
= gimple_assign_rhs1 (stmt
);
5710 tree op2
= gimple_assign_rhs2 (stmt
);
5712 imm_use_iterator use_iter
;
5714 auto_vec
<gimple
*> stmts
;
5716 gimple
*top_stmt
= stmt
;
5717 basic_block top_bb
= gimple_bb (stmt
);
5719 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5720 at-least stmt and possibly other trunc_div/trunc_mod stmts
5721 having same operands as stmt. */
5723 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, op1
)
5725 if (is_gimple_assign (use_stmt
)
5726 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
5727 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
5728 && operand_equal_p (op1
, gimple_assign_rhs1 (use_stmt
), 0)
5729 && operand_equal_p (op2
, gimple_assign_rhs2 (use_stmt
), 0))
5731 if (stmt_can_throw_internal (cfun
, use_stmt
))
5734 basic_block bb
= gimple_bb (use_stmt
);
5738 if (gimple_uid (use_stmt
) < gimple_uid (top_stmt
))
5739 top_stmt
= use_stmt
;
5741 else if (dominated_by_p (CDI_DOMINATORS
, top_bb
, bb
))
5744 top_stmt
= use_stmt
;
5749 tree top_op1
= gimple_assign_rhs1 (top_stmt
);
5750 tree top_op2
= gimple_assign_rhs2 (top_stmt
);
5752 stmts
.safe_push (top_stmt
);
5753 bool div_seen
= (gimple_assign_rhs_code (top_stmt
) == TRUNC_DIV_EXPR
);
5755 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5756 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5757 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5758 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5760 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, top_op1
)
5762 if (is_gimple_assign (use_stmt
)
5763 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
5764 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
5765 && operand_equal_p (top_op1
, gimple_assign_rhs1 (use_stmt
), 0)
5766 && operand_equal_p (top_op2
, gimple_assign_rhs2 (use_stmt
), 0))
5768 if (use_stmt
== top_stmt
5769 || stmt_can_throw_internal (cfun
, use_stmt
)
5770 || !dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
), top_bb
))
5773 stmts
.safe_push (use_stmt
);
5774 if (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
)
5782 /* Part 3: Create libcall to internal fn DIVMOD:
5783 divmod_tmp = DIVMOD (op1, op2). */
5785 gcall
*call_stmt
= gimple_build_call_internal (IFN_DIVMOD
, 2, op1
, op2
);
5786 tree res
= make_temp_ssa_name (build_complex_type (TREE_TYPE (op1
)),
5787 call_stmt
, "divmod_tmp");
5788 gimple_call_set_lhs (call_stmt
, res
);
5789 /* We rejected throwing statements above. */
5790 gimple_call_set_nothrow (call_stmt
, true);
5792 /* Insert the call before top_stmt. */
5793 gimple_stmt_iterator top_stmt_gsi
= gsi_for_stmt (top_stmt
);
5794 gsi_insert_before (&top_stmt_gsi
, call_stmt
, GSI_SAME_STMT
);
5796 widen_mul_stats
.divmod_calls_inserted
++;
5798 /* Update all statements in stmts vector:
5799 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5800 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5802 for (unsigned i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
5806 switch (gimple_assign_rhs_code (use_stmt
))
5808 case TRUNC_DIV_EXPR
:
5809 new_rhs
= fold_build1 (REALPART_EXPR
, TREE_TYPE (op1
), res
);
5812 case TRUNC_MOD_EXPR
:
5813 new_rhs
= fold_build1 (IMAGPART_EXPR
, TREE_TYPE (op1
), res
);
5820 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
5821 gimple_assign_set_rhs_from_tree (&gsi
, new_rhs
);
5822 update_stmt (use_stmt
);
5828 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5829 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
5830 value is true iff we converted the statement. */
5833 convert_mult_to_highpart (gassign
*stmt
, gimple_stmt_iterator
*gsi
)
5835 tree lhs
= gimple_assign_lhs (stmt
);
5836 tree stype
= TREE_TYPE (lhs
);
5837 tree sarg0
= gimple_assign_rhs1 (stmt
);
5838 tree sarg1
= gimple_assign_rhs2 (stmt
);
5840 if (TREE_CODE (stype
) != INTEGER_TYPE
5841 || TREE_CODE (sarg1
) != INTEGER_CST
5842 || TREE_CODE (sarg0
) != SSA_NAME
5843 || !tree_fits_uhwi_p (sarg1
)
5844 || !has_single_use (sarg0
))
5847 gassign
*def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (sarg0
));
5851 enum tree_code mcode
= gimple_assign_rhs_code (def
);
5852 if (mcode
== NOP_EXPR
)
5854 tree tmp
= gimple_assign_rhs1 (def
);
5855 if (TREE_CODE (tmp
) != SSA_NAME
|| !has_single_use (tmp
))
5857 def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (tmp
));
5860 mcode
= gimple_assign_rhs_code (def
);
5863 if (mcode
!= WIDEN_MULT_EXPR
5864 || gimple_bb (def
) != gimple_bb (stmt
))
5866 tree mtype
= TREE_TYPE (gimple_assign_lhs (def
));
5867 if (TREE_CODE (mtype
) != INTEGER_TYPE
5868 || TYPE_PRECISION (mtype
) != TYPE_PRECISION (stype
))
5871 tree mop1
= gimple_assign_rhs1 (def
);
5872 tree mop2
= gimple_assign_rhs2 (def
);
5873 tree optype
= TREE_TYPE (mop1
);
5874 bool unsignedp
= TYPE_UNSIGNED (optype
);
5875 unsigned int prec
= TYPE_PRECISION (optype
);
5877 if (unsignedp
!= TYPE_UNSIGNED (mtype
)
5878 || TYPE_PRECISION (mtype
) != 2 * prec
)
5881 unsigned HOST_WIDE_INT bits
= tree_to_uhwi (sarg1
);
5882 if (bits
< prec
|| bits
>= 2 * prec
)
5885 /* For the time being, require operands to have the same sign. */
5886 if (unsignedp
!= TYPE_UNSIGNED (TREE_TYPE (mop2
)))
5889 machine_mode mode
= TYPE_MODE (optype
);
5890 optab tab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
5891 if (optab_handler (tab
, mode
) == CODE_FOR_nothing
)
5894 location_t loc
= gimple_location (stmt
);
5895 tree highpart1
= build_and_insert_binop (gsi
, loc
, "highparttmp",
5896 MULT_HIGHPART_EXPR
, mop1
, mop2
);
5897 tree highpart2
= highpart1
;
5898 tree ntype
= optype
;
5900 if (TYPE_UNSIGNED (stype
) != TYPE_UNSIGNED (optype
))
5902 ntype
= TYPE_UNSIGNED (stype
) ? unsigned_type_for (optype
)
5903 : signed_type_for (optype
);
5904 highpart2
= build_and_insert_cast (gsi
, loc
, ntype
, highpart1
);
5907 highpart2
= build_and_insert_binop (gsi
, loc
, "highparttmp",
5908 RSHIFT_EXPR
, highpart2
,
5909 build_int_cst (ntype
, bits
- prec
));
5911 gassign
*new_stmt
= gimple_build_assign (lhs
, NOP_EXPR
, highpart2
);
5912 gsi_replace (gsi
, new_stmt
, true);
5914 widen_mul_stats
.highpart_mults_inserted
++;
5918 /* If target has spaceship<MODE>3 expander, pattern recognize
5919 <bb 2> [local count: 1073741824]:
5920 if (a_2(D) == b_3(D))
5921 goto <bb 6>; [34.00%]
5923 goto <bb 3>; [66.00%]
5925 <bb 3> [local count: 708669601]:
5926 if (a_2(D) < b_3(D))
5927 goto <bb 6>; [1.04%]
5929 goto <bb 4>; [98.96%]
5931 <bb 4> [local count: 701299439]:
5932 if (a_2(D) > b_3(D))
5933 goto <bb 5>; [48.89%]
5935 goto <bb 6>; [51.11%]
5937 <bb 5> [local count: 342865295]:
5939 <bb 6> [local count: 1073741824]:
5941 <bb 2> [local count: 1073741824]:
5942 _1 = .SPACESHIP (a_2(D), b_3(D), 0);
5944 goto <bb 6>; [34.00%]
5946 goto <bb 3>; [66.00%]
5948 <bb 3> [local count: 708669601]:
5950 goto <bb 6>; [1.04%]
5952 goto <bb 4>; [98.96%]
5954 <bb 4> [local count: 701299439]:
5956 goto <bb 5>; [48.89%]
5958 goto <bb 6>; [51.11%]
5960 <bb 5> [local count: 342865295]:
5962 <bb 6> [local count: 1073741824]:
5963 so that the backend can emit optimal comparison and
5964 conditional jump sequence. If the
5965 <bb 6> [local count: 1073741824]:
5966 above has a single PHI like:
5967 # _27 = PHI<0(2), -1(3), 2(4), 1(5)>
5968 then replace it with effectively
5969 _1 = .SPACESHIP (a_2(D), b_3(D), 1);
5973 optimize_spaceship (gcond
*stmt
)
5975 enum tree_code code
= gimple_cond_code (stmt
);
5976 if (code
!= EQ_EXPR
&& code
!= NE_EXPR
)
5978 tree arg1
= gimple_cond_lhs (stmt
);
5979 tree arg2
= gimple_cond_rhs (stmt
);
5980 if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
))
5981 && !INTEGRAL_TYPE_P (TREE_TYPE (arg1
)))
5982 || optab_handler (spaceship_optab
,
5983 TYPE_MODE (TREE_TYPE (arg1
))) == CODE_FOR_nothing
5984 || operand_equal_p (arg1
, arg2
, 0))
5987 basic_block bb0
= gimple_bb (stmt
), bb1
, bb2
= NULL
;
5988 edge em1
= NULL
, e1
= NULL
, e2
= NULL
;
5989 bb1
= EDGE_SUCC (bb0
, 1)->dest
;
5990 if (((EDGE_SUCC (bb0
, 0)->flags
& EDGE_TRUE_VALUE
) != 0) ^ (code
== EQ_EXPR
))
5991 bb1
= EDGE_SUCC (bb0
, 0)->dest
;
5993 gcond
*g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb1
));
5995 || !single_pred_p (bb1
)
5996 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
5997 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
5998 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
5999 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
6000 || !cond_only_block_p (bb1
))
6003 enum tree_code ccode
= (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
6004 ? LT_EXPR
: GT_EXPR
);
6005 switch (gimple_cond_code (g
))
6012 ccode
= ccode
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
6018 for (int i
= 0; i
< 2; ++i
)
6020 /* With NaNs, </<=/>/>= are false, so we need to look for the
6021 third comparison on the false edge from whatever non-equality
6022 comparison the second comparison is. */
6023 if (HONOR_NANS (TREE_TYPE (arg1
))
6024 && (EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0)
6027 bb2
= EDGE_SUCC (bb1
, i
)->dest
;
6028 g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb2
));
6030 || !single_pred_p (bb2
)
6031 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
6032 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
6033 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
6034 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
6035 || !cond_only_block_p (bb2
)
6036 || EDGE_SUCC (bb2
, 0)->dest
== EDGE_SUCC (bb2
, 1)->dest
)
6039 enum tree_code ccode2
6040 = (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0) ? LT_EXPR
: GT_EXPR
);
6041 switch (gimple_cond_code (g
))
6048 ccode2
= ccode2
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
6053 if (HONOR_NANS (TREE_TYPE (arg1
)) && ccode
== ccode2
)
6056 if ((ccode
== LT_EXPR
)
6057 ^ ((EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0))
6059 em1
= EDGE_SUCC (bb1
, 1 - i
);
6060 e1
= EDGE_SUCC (bb2
, 0);
6061 e2
= EDGE_SUCC (bb2
, 1);
6062 if ((ccode2
== LT_EXPR
) ^ ((e1
->flags
& EDGE_TRUE_VALUE
) == 0))
6067 e1
= EDGE_SUCC (bb1
, 1 - i
);
6068 em1
= EDGE_SUCC (bb2
, 0);
6069 e2
= EDGE_SUCC (bb2
, 1);
6070 if ((ccode2
!= LT_EXPR
) ^ ((em1
->flags
& EDGE_TRUE_VALUE
) == 0))
6071 std::swap (em1
, e2
);
6078 if ((ccode
== LT_EXPR
)
6079 ^ ((EDGE_SUCC (bb1
, 0)->flags
& EDGE_TRUE_VALUE
) != 0))
6081 em1
= EDGE_SUCC (bb1
, 1);
6082 e1
= EDGE_SUCC (bb1
, 0);
6083 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
6087 em1
= EDGE_SUCC (bb1
, 0);
6088 e1
= EDGE_SUCC (bb1
, 1);
6089 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
6093 /* Check if there is a single bb into which all failed conditions
6094 jump to (perhaps through an empty block) and if it results in
6095 a single integral PHI which just sets it to -1, 0, 1, X
6096 (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather
6097 than 0 as last .SPACESHIP argument to tell backends it might
6098 consider different code generation and just cast the result
6099 of .SPACESHIP to the PHI result. X above is some value
6100 other than -1, 0, 1, for libstdc++ 2, for libc++ -127. */
6101 tree arg3
= integer_zero_node
;
6102 edge e
= EDGE_SUCC (bb0
, 0);
6104 e
= EDGE_SUCC (bb0
, 1);
6105 basic_block bbp
= e
->dest
;
6107 for (gphi_iterator psi
= gsi_start_phis (bbp
);
6108 !gsi_end_p (psi
); gsi_next (&psi
))
6110 gphi
*gp
= psi
.phi ();
6111 tree res
= gimple_phi_result (gp
);
6114 || virtual_operand_p (res
)
6115 || !INTEGRAL_TYPE_P (TREE_TYPE (res
))
6116 || TYPE_PRECISION (TREE_TYPE (res
)) < 2)
6124 && integer_zerop (gimple_phi_arg_def_from_edge (phi
, e
))
6125 && EDGE_COUNT (bbp
->preds
) == (HONOR_NANS (TREE_TYPE (arg1
)) ? 4 : 3))
6127 HOST_WIDE_INT argval
= SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
)) ? 2 : -1;
6128 for (unsigned i
= 0; phi
&& i
< EDGE_COUNT (bbp
->preds
) - 1; ++i
)
6130 edge e3
= i
== 0 ? e1
: i
== 1 ? em1
: e2
;
6131 if (e3
->dest
!= bbp
)
6133 if (!empty_block_p (e3
->dest
)
6134 || !single_succ_p (e3
->dest
)
6135 || single_succ (e3
->dest
) != bbp
)
6140 e3
= single_succ_edge (e3
->dest
);
6142 tree a
= gimple_phi_arg_def_from_edge (phi
, e3
);
6143 if (TREE_CODE (a
) != INTEGER_CST
6144 || (i
== 0 && !integer_onep (a
))
6145 || (i
== 1 && !integer_all_onesp (a
)))
6152 tree minv
= TYPE_MIN_VALUE (signed_char_type_node
);
6153 tree maxv
= TYPE_MAX_VALUE (signed_char_type_node
);
6154 widest_int w
= widest_int::from (wi::to_wide (a
), SIGNED
);
6155 if ((w
>= -1 && w
<= 1)
6156 || w
< wi::to_widest (minv
)
6157 || w
> wi::to_widest (maxv
))
6162 argval
= w
.to_shwi ();
6166 arg3
= build_int_cst (integer_type_node
,
6167 TYPE_UNSIGNED (TREE_TYPE (arg1
)) ? 1 : argval
);
6170 /* For integral <=> comparisons only use .SPACESHIP if it is turned
6171 into an integer (-1, 0, 1). */
6172 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
)) && arg3
== integer_zero_node
)
6175 gcall
*gc
= gimple_build_call_internal (IFN_SPACESHIP
, 3, arg1
, arg2
, arg3
);
6176 tree lhs
= make_ssa_name (integer_type_node
);
6177 gimple_call_set_lhs (gc
, lhs
);
6178 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6179 gsi_insert_before (&gsi
, gc
, GSI_SAME_STMT
);
6181 wide_int wmin
= wi::minus_one (TYPE_PRECISION (integer_type_node
));
6182 wide_int wmax
= wi::one (TYPE_PRECISION (integer_type_node
));
6183 if (HONOR_NANS (TREE_TYPE (arg1
)))
6185 if (arg3
== integer_zero_node
)
6186 wmax
= wi::two (TYPE_PRECISION (integer_type_node
));
6187 else if (tree_int_cst_sgn (arg3
) < 0)
6188 wmin
= wi::to_wide (arg3
);
6190 wmax
= wi::to_wide (arg3
);
6192 int_range
<1> vr (TREE_TYPE (lhs
), wmin
, wmax
);
6193 set_range_info (lhs
, vr
);
6195 if (arg3
!= integer_zero_node
)
6197 tree type
= TREE_TYPE (gimple_phi_result (phi
));
6198 if (!useless_type_conversion_p (type
, integer_type_node
))
6200 tree tem
= make_ssa_name (type
);
6201 gimple
*gcv
= gimple_build_assign (tem
, NOP_EXPR
, lhs
);
6202 gsi_insert_before (&gsi
, gcv
, GSI_SAME_STMT
);
6205 SET_PHI_ARG_DEF_ON_EDGE (phi
, e
, lhs
);
6206 gimple_cond_set_lhs (stmt
, boolean_false_node
);
6207 gimple_cond_set_rhs (stmt
, boolean_false_node
);
6208 gimple_cond_set_code (stmt
, (e
->flags
& EDGE_TRUE_VALUE
)
6209 ? EQ_EXPR
: NE_EXPR
);
6214 gimple_cond_set_lhs (stmt
, lhs
);
6215 gimple_cond_set_rhs (stmt
, integer_zero_node
);
6218 gcond
*cond
= as_a
<gcond
*> (*gsi_last_bb (bb1
));
6219 gimple_cond_set_lhs (cond
, lhs
);
6220 if (em1
->src
== bb1
&& e2
!= em1
)
6222 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
6223 gimple_cond_set_code (cond
, (em1
->flags
& EDGE_TRUE_VALUE
)
6224 ? EQ_EXPR
: NE_EXPR
);
6228 gcc_assert (e1
->src
== bb1
&& e2
!= e1
);
6229 gimple_cond_set_rhs (cond
, integer_one_node
);
6230 gimple_cond_set_code (cond
, (e1
->flags
& EDGE_TRUE_VALUE
)
6231 ? EQ_EXPR
: NE_EXPR
);
6235 if (e2
!= e1
&& e2
!= em1
)
6237 cond
= as_a
<gcond
*> (*gsi_last_bb (bb2
));
6238 gimple_cond_set_lhs (cond
, lhs
);
6239 if (em1
->src
== bb2
)
6240 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
6243 gcc_assert (e1
->src
== bb2
);
6244 gimple_cond_set_rhs (cond
, integer_one_node
);
6246 gimple_cond_set_code (cond
,
6247 (e2
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
);
6253 /* Find integer multiplications where the operands are extended from
6254 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
6255 or MULT_HIGHPART_EXPR where appropriate. */
6259 const pass_data pass_data_optimize_widening_mul
=
6261 GIMPLE_PASS
, /* type */
6262 "widening_mul", /* name */
6263 OPTGROUP_NONE
, /* optinfo_flags */
6264 TV_TREE_WIDEN_MUL
, /* tv_id */
6265 PROP_ssa
, /* properties_required */
6266 0, /* properties_provided */
6267 0, /* properties_destroyed */
6268 0, /* todo_flags_start */
6269 TODO_update_ssa
, /* todo_flags_finish */
6272 class pass_optimize_widening_mul
: public gimple_opt_pass
6275 pass_optimize_widening_mul (gcc::context
*ctxt
)
6276 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
6279 /* opt_pass methods: */
6280 bool gate (function
*) final override
6282 return flag_expensive_optimizations
&& optimize
;
6285 unsigned int execute (function
*) final override
;
6287 }; // class pass_optimize_widening_mul
6289 /* Walker class to perform the transformation in reverse dominance order. */
6291 class math_opts_dom_walker
: public dom_walker
6294 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
6295 if walking modidifes the CFG. */
6297 math_opts_dom_walker (bool *cfg_changed_p
)
6298 : dom_walker (CDI_DOMINATORS
), m_last_result_set (),
6299 m_cfg_changed_p (cfg_changed_p
) {}
6301 /* The actual actions performed in the walk. */
6303 void after_dom_children (basic_block
) final override
;
6305 /* Set of results of chains of multiply and add statement combinations that
6306 were not transformed into FMAs because of active deferring. */
6307 hash_set
<tree
> m_last_result_set
;
6309 /* Pointer to a flag of the user that needs to be set if CFG has been
6311 bool *m_cfg_changed_p
;
6315 math_opts_dom_walker::after_dom_children (basic_block bb
)
6317 gimple_stmt_iterator gsi
;
6319 fma_deferring_state
fma_state (param_avoid_fma_max_bits
> 0);
6321 for (gphi_iterator psi_next
, psi
= gsi_start_phis (bb
); !gsi_end_p (psi
);
6325 gsi_next (&psi_next
);
6327 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
6328 gphi
*phi
= psi
.phi ();
6330 if (match_saturation_add (&gsi
, phi
)
6331 || match_saturation_sub (&gsi
, phi
)
6332 || match_saturation_trunc (&gsi
, phi
))
6333 remove_phi_node (&psi
, /* release_lhs_p */ false);
6336 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
6338 gimple
*stmt
= gsi_stmt (gsi
);
6339 enum tree_code code
;
6341 if (is_gimple_assign (stmt
))
6343 code
= gimple_assign_rhs_code (stmt
);
6347 if (!convert_mult_to_widen (stmt
, &gsi
)
6348 && !convert_expand_mult_copysign (stmt
, &gsi
)
6349 && convert_mult_to_fma (stmt
,
6350 gimple_assign_rhs1 (stmt
),
6351 gimple_assign_rhs2 (stmt
),
6354 gsi_remove (&gsi
, true);
6355 release_defs (stmt
);
6358 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
6359 match_unsigned_saturation_sub (&gsi
, as_a
<gassign
*> (stmt
));
6363 match_unsigned_saturation_add (&gsi
, as_a
<gassign
*> (stmt
));
6364 match_unsigned_saturation_sub (&gsi
, as_a
<gassign
*> (stmt
));
6367 if (!convert_plusminus_to_widen (&gsi
, stmt
, code
))
6369 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
6370 if (gsi_stmt (gsi
) == stmt
)
6371 match_uaddc_usubc (&gsi
, stmt
, code
);
6376 if (match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
))
6380 case TRUNC_MOD_EXPR
:
6381 convert_to_divmod (as_a
<gassign
*> (stmt
));
6385 convert_mult_to_highpart (as_a
<gassign
*> (stmt
), &gsi
);
6389 match_unsigned_saturation_add (&gsi
, as_a
<gassign
*> (stmt
));
6390 match_unsigned_saturation_trunc (&gsi
, as_a
<gassign
*> (stmt
));
6393 match_uaddc_usubc (&gsi
, stmt
, code
);
6400 match_single_bit_test (&gsi
, stmt
);
6405 match_unsigned_saturation_sub (&gsi
, as_a
<gassign
*> (stmt
));
6409 match_unsigned_saturation_trunc (&gsi
, as_a
<gassign
*> (stmt
));
6415 else if (is_gimple_call (stmt
))
6417 switch (gimple_call_combined_fn (stmt
))
6420 if (gimple_call_lhs (stmt
)
6421 && TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
6422 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
6424 && convert_mult_to_fma (stmt
,
6425 gimple_call_arg (stmt
, 0),
6426 gimple_call_arg (stmt
, 0),
6429 unlink_stmt_vdef (stmt
);
6430 if (gsi_remove (&gsi
, true)
6431 && gimple_purge_dead_eh_edges (bb
))
6432 *m_cfg_changed_p
= true;
6433 release_defs (stmt
);
6439 if (convert_mult_to_fma (stmt
,
6440 gimple_call_arg (stmt
, 1),
6441 gimple_call_arg (stmt
, 2),
6443 gimple_call_arg (stmt
, 0)))
6446 gsi_remove (&gsi
, true);
6447 release_defs (stmt
);
6452 case CFN_COND_LEN_MUL
:
6453 if (convert_mult_to_fma (stmt
,
6454 gimple_call_arg (stmt
, 1),
6455 gimple_call_arg (stmt
, 2),
6457 gimple_call_arg (stmt
, 0),
6458 gimple_call_arg (stmt
, 4),
6459 gimple_call_arg (stmt
, 5)))
6462 gsi_remove (&gsi
, true);
6463 release_defs (stmt
);
6469 cancel_fma_deferring (&fma_state
);
6476 else if (gimple_code (stmt
) == GIMPLE_COND
)
6478 match_single_bit_test (&gsi
, stmt
);
6479 optimize_spaceship (as_a
<gcond
*> (stmt
));
6483 if (fma_state
.m_deferring_p
6484 && fma_state
.m_initial_phi
)
6486 gcc_checking_assert (fma_state
.m_last_result
);
6487 if (!last_fma_candidate_feeds_initial_phi (&fma_state
,
6488 &m_last_result_set
))
6489 cancel_fma_deferring (&fma_state
);
6491 m_last_result_set
.add (fma_state
.m_last_result
);
6497 pass_optimize_widening_mul::execute (function
*fun
)
6499 bool cfg_changed
= false;
6501 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
6502 calculate_dominance_info (CDI_DOMINATORS
);
6503 renumber_gimple_stmt_uids (cfun
);
6505 math_opts_dom_walker (&cfg_changed
).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6507 statistics_counter_event (fun
, "widening multiplications inserted",
6508 widen_mul_stats
.widen_mults_inserted
);
6509 statistics_counter_event (fun
, "widening maccs inserted",
6510 widen_mul_stats
.maccs_inserted
);
6511 statistics_counter_event (fun
, "fused multiply-adds inserted",
6512 widen_mul_stats
.fmas_inserted
);
6513 statistics_counter_event (fun
, "divmod calls inserted",
6514 widen_mul_stats
.divmod_calls_inserted
);
6515 statistics_counter_event (fun
, "highpart multiplications inserted",
6516 widen_mul_stats
.highpart_mults_inserted
);
6518 return cfg_changed
? TODO_cleanup_cfg
: 0;
6524 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
6526 return new pass_optimize_widening_mul (ctxt
);