1 /* Global, SSA-based optimizations using mathematical identities.
2 Copyright (C) 2005-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* Currently, the only mini-pass in this file tries to CSE reciprocal
21 operations. These are common in sequences such as this one:
23 modulus = sqrt(x*x + y*y + z*z);
28 that can be optimized to
30 modulus = sqrt(x*x + y*y + z*z);
31 rmodulus = 1.0 / modulus;
36 We do this for loop invariant divisors, and with this pass whenever
37 we notice that a division has the same divisor multiple times.
39 Of course, like in PRE, we don't insert a division if a dominator
40 already has one. However, this cannot be done as an extension of
41 PRE for several reasons.
43 First of all, with some experiments it was found out that the
44 transformation is not always useful if there are only two divisions
45 by the same divisor. This is probably because modern processors
46 can pipeline the divisions; on older, in-order processors it should
47 still be effective to optimize two divisions by the same number.
48 We make this a param, and it shall be called N in the remainder of
51 Second, if trapping math is active, we have less freedom on where
52 to insert divisions: we can only do so in basic blocks that already
53 contain one. (If divisions don't trap, instead, we can insert
54 divisions elsewhere, which will be in blocks that are common dominators
55 of those that have the division).
57 We really don't want to compute the reciprocal unless a division will
58 be found. To do this, we won't insert the division in a basic block
59 that has less than N divisions *post-dominating* it.
61 The algorithm constructs a subset of the dominator tree, holding the
62 blocks containing the divisions and the common dominators to them,
63 and walk it twice. The first walk is in post-order, and it annotates
64 each block with the number of divisions that post-dominate it: this
65 gives information on where divisions can be inserted profitably.
66 The second walk is in pre-order, and it inserts divisions as explained
67 above, and replaces divisions by multiplications.
69 In the best case, the cost of the pass is O(n_statements). In the
70 worst-case, the cost is due to creating the dominator tree subset,
71 with a cost of O(n_basic_blocks ^ 2); however this can only happen
72 for n_statements / n_basic_blocks statements. So, the amortized cost
73 of creating the dominator tree subset is O(n_basic_blocks) and the
74 worst-case cost of the pass is O(n_statements * n_basic_blocks).
76 More practically, the cost will be small because there are few
77 divisions, and they tend to be in the same basic block, so insert_bb
78 is called very few times.
80 If we did this using domwalk.cc, an efficient implementation would have
81 to work on all the variables in a single pass, because we could not
82 work on just a subset of the dominator tree, as we do now, and the
83 cost would also be something like O(n_statements * n_basic_blocks).
84 The data structures would be more complex in order to work on all the
85 variables in a single pass. */
89 #include "coretypes.h"
96 #include "alloc-pool.h"
97 #include "tree-pass.h"
99 #include "optabs-tree.h"
100 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
103 #include "gimple-iterator.h"
104 #include "gimple-fold.h"
105 #include "gimplify.h"
106 #include "gimplify-me.h"
107 #include "stor-layout.h"
108 #include "tree-cfg.h"
109 #include "tree-dfa.h"
110 #include "tree-ssa.h"
111 #include "builtins.h"
112 #include "internal-fn.h"
113 #include "case-cfn-macros.h"
114 #include "optabs-libfuncs.h"
116 #include "targhooks.h"
118 #include "tree-ssa-math-opts.h"
120 #include "cfghooks.h"
122 /* This structure represents one basic block that either computes a
123 division, or is a common dominator for basic block that compute a
126 /* The basic block represented by this structure. */
127 basic_block bb
= basic_block();
129 /* If non-NULL, the SSA_NAME holding the definition for a reciprocal
131 tree recip_def
= tree();
133 /* If non-NULL, the SSA_NAME holding the definition for a squared
134 reciprocal inserted in BB. */
135 tree square_recip_def
= tree();
137 /* If non-NULL, the GIMPLE_ASSIGN for a reciprocal computation that
138 was inserted in BB. */
139 gimple
*recip_def_stmt
= nullptr;
141 /* Pointer to a list of "struct occurrence"s for blocks dominated
143 struct occurrence
*children
= nullptr;
145 /* Pointer to the next "struct occurrence"s in the list of blocks
146 sharing a common dominator. */
147 struct occurrence
*next
= nullptr;
149 /* The number of divisions that are in BB before compute_merit. The
150 number of divisions that are in BB or post-dominate it after
152 int num_divisions
= 0;
154 /* True if the basic block has a division, false if it is a common
155 dominator for basic blocks that do. If it is false and trapping
156 math is active, BB is not a candidate for inserting a reciprocal. */
157 bool bb_has_division
= false;
159 /* Construct a struct occurrence for basic block BB, and whose
160 children list is headed by CHILDREN. */
161 occurrence (basic_block bb
, struct occurrence
*children
)
162 : bb (bb
), children (children
)
167 /* Destroy a struct occurrence and remove it from its basic block. */
173 /* Allocate memory for a struct occurrence from OCC_POOL. */
174 static void* operator new (size_t);
176 /* Return memory for a struct occurrence to OCC_POOL. */
177 static void operator delete (void*, size_t);
182 /* Number of 1.0/X ops inserted. */
185 /* Number of 1.0/FUNC ops inserted. */
191 /* Number of cexpi calls inserted. */
194 /* Number of conversions removed. */
201 /* Number of widening multiplication ops inserted. */
202 int widen_mults_inserted
;
204 /* Number of integer multiply-and-accumulate ops inserted. */
207 /* Number of fp fused multiply-add ops inserted. */
210 /* Number of divmod calls inserted. */
211 int divmod_calls_inserted
;
213 /* Number of highpart multiplication ops inserted. */
214 int highpart_mults_inserted
;
217 /* The instance of "struct occurrence" representing the highest
218 interesting block in the dominator tree. */
219 static struct occurrence
*occ_head
;
221 /* Allocation pool for getting instances of "struct occurrence". */
222 static object_allocator
<occurrence
> *occ_pool
;
224 void* occurrence::operator new (size_t n
)
226 gcc_assert (n
== sizeof(occurrence
));
227 return occ_pool
->allocate_raw ();
230 void occurrence::operator delete (void *occ
, size_t n
)
232 gcc_assert (n
== sizeof(occurrence
));
233 occ_pool
->remove_raw (occ
);
236 /* Insert NEW_OCC into our subset of the dominator tree. P_HEAD points to a
237 list of "struct occurrence"s, one per basic block, having IDOM as
238 their common dominator.
240 We try to insert NEW_OCC as deep as possible in the tree, and we also
241 insert any other block that is a common dominator for BB and one
242 block already in the tree. */
245 insert_bb (struct occurrence
*new_occ
, basic_block idom
,
246 struct occurrence
**p_head
)
248 struct occurrence
*occ
, **p_occ
;
250 for (p_occ
= p_head
; (occ
= *p_occ
) != NULL
; )
252 basic_block bb
= new_occ
->bb
, occ_bb
= occ
->bb
;
253 basic_block dom
= nearest_common_dominator (CDI_DOMINATORS
, occ_bb
, bb
);
256 /* BB dominates OCC_BB. OCC becomes NEW_OCC's child: remove OCC
259 occ
->next
= new_occ
->children
;
260 new_occ
->children
= occ
;
262 /* Try the next block (it may as well be dominated by BB). */
265 else if (dom
== occ_bb
)
267 /* OCC_BB dominates BB. Tail recurse to look deeper. */
268 insert_bb (new_occ
, dom
, &occ
->children
);
272 else if (dom
!= idom
)
274 gcc_assert (!dom
->aux
);
276 /* There is a dominator between IDOM and BB, add it and make
277 two children out of NEW_OCC and OCC. First, remove OCC from
283 /* None of the previous blocks has DOM as a dominator: if we tail
284 recursed, we would reexamine them uselessly. Just switch BB with
285 DOM, and go on looking for blocks dominated by DOM. */
286 new_occ
= new occurrence (dom
, new_occ
);
291 /* Nothing special, go on with the next element. */
296 /* No place was found as a child of IDOM. Make BB a sibling of IDOM. */
297 new_occ
->next
= *p_head
;
301 /* Register that we found a division in BB.
302 IMPORTANCE is a measure of how much weighting to give
303 that division. Use IMPORTANCE = 2 to register a single
304 division. If the division is going to be found multiple
305 times use 1 (as it is with squares). */
308 register_division_in (basic_block bb
, int importance
)
310 struct occurrence
*occ
;
312 occ
= (struct occurrence
*) bb
->aux
;
315 occ
= new occurrence (bb
, NULL
);
316 insert_bb (occ
, ENTRY_BLOCK_PTR_FOR_FN (cfun
), &occ_head
);
319 occ
->bb_has_division
= true;
320 occ
->num_divisions
+= importance
;
324 /* Compute the number of divisions that postdominate each block in OCC and
328 compute_merit (struct occurrence
*occ
)
330 struct occurrence
*occ_child
;
331 basic_block dom
= occ
->bb
;
333 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
336 if (occ_child
->children
)
337 compute_merit (occ_child
);
340 bb
= single_noncomplex_succ (dom
);
344 if (dominated_by_p (CDI_POST_DOMINATORS
, bb
, occ_child
->bb
))
345 occ
->num_divisions
+= occ_child
->num_divisions
;
350 /* Return whether USE_STMT is a floating-point division by DEF. */
352 is_division_by (gimple
*use_stmt
, tree def
)
354 return is_gimple_assign (use_stmt
)
355 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
356 && gimple_assign_rhs2 (use_stmt
) == def
357 /* Do not recognize x / x as valid division, as we are getting
358 confused later by replacing all immediate uses x in such
360 && gimple_assign_rhs1 (use_stmt
) != def
361 && !stmt_can_throw_internal (cfun
, use_stmt
);
364 /* Return TRUE if USE_STMT is a multiplication of DEF by A. */
366 is_mult_by (gimple
*use_stmt
, tree def
, tree a
)
368 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
369 && gimple_assign_rhs_code (use_stmt
) == MULT_EXPR
)
371 tree op0
= gimple_assign_rhs1 (use_stmt
);
372 tree op1
= gimple_assign_rhs2 (use_stmt
);
374 return (op0
== def
&& op1
== a
)
375 || (op0
== a
&& op1
== def
);
380 /* Return whether USE_STMT is DEF * DEF. */
382 is_square_of (gimple
*use_stmt
, tree def
)
384 return is_mult_by (use_stmt
, def
, def
);
387 /* Return whether USE_STMT is a floating-point division by
390 is_division_by_square (gimple
*use_stmt
, tree def
)
392 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
393 && gimple_assign_rhs_code (use_stmt
) == RDIV_EXPR
394 && gimple_assign_rhs1 (use_stmt
) != gimple_assign_rhs2 (use_stmt
)
395 && !stmt_can_throw_internal (cfun
, use_stmt
))
397 tree denominator
= gimple_assign_rhs2 (use_stmt
);
398 if (TREE_CODE (denominator
) == SSA_NAME
)
399 return is_square_of (SSA_NAME_DEF_STMT (denominator
), def
);
404 /* Walk the subset of the dominator tree rooted at OCC, setting the
405 RECIP_DEF field to a definition of 1.0 / DEF that can be used in
406 the given basic block. The field may be left NULL, of course,
407 if it is not possible or profitable to do the optimization.
409 DEF_BSI is an iterator pointing at the statement defining DEF.
410 If RECIP_DEF is set, a dominator already has a computation that can
413 If should_insert_square_recip is set, then this also inserts
414 the square of the reciprocal immediately after the definition
415 of the reciprocal. */
418 insert_reciprocals (gimple_stmt_iterator
*def_gsi
, struct occurrence
*occ
,
419 tree def
, tree recip_def
, tree square_recip_def
,
420 int should_insert_square_recip
, int threshold
)
423 gassign
*new_stmt
, *new_square_stmt
;
424 gimple_stmt_iterator gsi
;
425 struct occurrence
*occ_child
;
428 && (occ
->bb_has_division
|| !flag_trapping_math
)
429 /* Divide by two as all divisions are counted twice in
431 && occ
->num_divisions
/ 2 >= threshold
)
433 /* Make a variable with the replacement and substitute it. */
434 type
= TREE_TYPE (def
);
435 recip_def
= create_tmp_reg (type
, "reciptmp");
436 new_stmt
= gimple_build_assign (recip_def
, RDIV_EXPR
,
437 build_one_cst (type
), def
);
439 if (should_insert_square_recip
)
441 square_recip_def
= create_tmp_reg (type
, "powmult_reciptmp");
442 new_square_stmt
= gimple_build_assign (square_recip_def
, MULT_EXPR
,
443 recip_def
, recip_def
);
446 if (occ
->bb_has_division
)
448 /* Case 1: insert before an existing division. */
449 gsi
= gsi_after_labels (occ
->bb
);
450 while (!gsi_end_p (gsi
)
451 && (!is_division_by (gsi_stmt (gsi
), def
))
452 && (!is_division_by_square (gsi_stmt (gsi
), def
)))
455 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
456 if (should_insert_square_recip
)
457 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
459 else if (def_gsi
&& occ
->bb
== gsi_bb (*def_gsi
))
461 /* Case 2: insert right after the definition. Note that this will
462 never happen if the definition statement can throw, because in
463 that case the sole successor of the statement's basic block will
464 dominate all the uses as well. */
465 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
466 if (should_insert_square_recip
)
467 gsi_insert_after (def_gsi
, new_square_stmt
, GSI_NEW_STMT
);
471 /* Case 3: insert in a basic block not containing defs/uses. */
472 gsi
= gsi_after_labels (occ
->bb
);
473 gsi_insert_before (&gsi
, new_stmt
, GSI_SAME_STMT
);
474 if (should_insert_square_recip
)
475 gsi_insert_before (&gsi
, new_square_stmt
, GSI_SAME_STMT
);
478 reciprocal_stats
.rdivs_inserted
++;
480 occ
->recip_def_stmt
= new_stmt
;
483 occ
->recip_def
= recip_def
;
484 occ
->square_recip_def
= square_recip_def
;
485 for (occ_child
= occ
->children
; occ_child
; occ_child
= occ_child
->next
)
486 insert_reciprocals (def_gsi
, occ_child
, def
, recip_def
,
487 square_recip_def
, should_insert_square_recip
,
491 /* Replace occurrences of expr / (x * x) with expr * ((1 / x) * (1 / x)).
492 Take as argument the use for (x * x). */
494 replace_reciprocal_squares (use_operand_p use_p
)
496 gimple
*use_stmt
= USE_STMT (use_p
);
497 basic_block bb
= gimple_bb (use_stmt
);
498 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
500 if (optimize_bb_for_speed_p (bb
) && occ
->square_recip_def
503 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
504 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
505 gimple_assign_set_rhs2 (use_stmt
, occ
->square_recip_def
);
506 SET_USE (use_p
, occ
->square_recip_def
);
507 fold_stmt_inplace (&gsi
);
508 update_stmt (use_stmt
);
513 /* Replace the division at USE_P with a multiplication by the reciprocal, if
517 replace_reciprocal (use_operand_p use_p
)
519 gimple
*use_stmt
= USE_STMT (use_p
);
520 basic_block bb
= gimple_bb (use_stmt
);
521 struct occurrence
*occ
= (struct occurrence
*) bb
->aux
;
523 if (optimize_bb_for_speed_p (bb
)
524 && occ
->recip_def
&& use_stmt
!= occ
->recip_def_stmt
)
526 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
527 gimple_assign_set_rhs_code (use_stmt
, MULT_EXPR
);
528 SET_USE (use_p
, occ
->recip_def
);
529 fold_stmt_inplace (&gsi
);
530 update_stmt (use_stmt
);
535 /* Free OCC and return one more "struct occurrence" to be freed. */
537 static struct occurrence
*
538 free_bb (struct occurrence
*occ
)
540 struct occurrence
*child
, *next
;
542 /* First get the two pointers hanging off OCC. */
544 child
= occ
->children
;
547 /* Now ensure that we don't recurse unless it is necessary. */
553 next
= free_bb (next
);
559 /* Transform sequences like
569 depending on the uses of x, r1, r2. This removes one multiplication and
570 allows the sqrt and division operations to execute in parallel.
571 DEF_GSI is the gsi of the initial division by sqrt that defines
572 DEF (x in the example above). */
575 optimize_recip_sqrt (gimple_stmt_iterator
*def_gsi
, tree def
)
578 imm_use_iterator use_iter
;
579 gimple
*stmt
= gsi_stmt (*def_gsi
);
581 tree orig_sqrt_ssa_name
= gimple_assign_rhs2 (stmt
);
582 tree div_rhs1
= gimple_assign_rhs1 (stmt
);
584 if (TREE_CODE (orig_sqrt_ssa_name
) != SSA_NAME
585 || TREE_CODE (div_rhs1
) != REAL_CST
586 || !real_equal (&TREE_REAL_CST (div_rhs1
), &dconst1
))
590 = dyn_cast
<gcall
*> (SSA_NAME_DEF_STMT (orig_sqrt_ssa_name
));
592 if (!sqrt_stmt
|| !gimple_call_lhs (sqrt_stmt
))
595 switch (gimple_call_combined_fn (sqrt_stmt
))
604 tree a
= gimple_call_arg (sqrt_stmt
, 0);
606 /* We have 'a' and 'x'. Now analyze the uses of 'x'. */
608 /* Statements that use x in x * x. */
609 auto_vec
<gimple
*> sqr_stmts
;
610 /* Statements that use x in a * x. */
611 auto_vec
<gimple
*> mult_stmts
;
612 bool has_other_use
= false;
613 bool mult_on_main_path
= false;
615 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, x
)
617 if (is_gimple_debug (use_stmt
))
619 if (is_square_of (use_stmt
, x
))
621 sqr_stmts
.safe_push (use_stmt
);
622 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
623 mult_on_main_path
= true;
625 else if (is_mult_by (use_stmt
, x
, a
))
627 mult_stmts
.safe_push (use_stmt
);
628 if (gimple_bb (use_stmt
) == gimple_bb (stmt
))
629 mult_on_main_path
= true;
632 has_other_use
= true;
635 /* In the x * x and a * x cases we just rewire stmt operands or
636 remove multiplications. In the has_other_use case we introduce
637 a multiplication so make sure we don't introduce a multiplication
638 on a path where there was none. */
639 if (has_other_use
&& !mult_on_main_path
)
642 if (sqr_stmts
.is_empty () && mult_stmts
.is_empty ())
645 /* If x = 1.0 / sqrt (a) has uses other than those optimized here we want
646 to be able to compose it from the sqr and mult cases. */
647 if (has_other_use
&& (sqr_stmts
.is_empty () || mult_stmts
.is_empty ()))
652 fprintf (dump_file
, "Optimizing reciprocal sqrt multiplications of\n");
653 print_gimple_stmt (dump_file
, sqrt_stmt
, 0, TDF_NONE
);
654 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
655 fprintf (dump_file
, "\n");
658 bool delete_div
= !has_other_use
;
659 tree sqr_ssa_name
= NULL_TREE
;
660 if (!sqr_stmts
.is_empty ())
662 /* r1 = x * x. Transform the original
669 = make_temp_ssa_name (TREE_TYPE (a
), NULL
, "recip_sqrt_sqr");
673 fprintf (dump_file
, "Replacing original division\n");
674 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
675 fprintf (dump_file
, "with new division\n");
678 = gimple_build_assign (sqr_ssa_name
, gimple_assign_rhs_code (stmt
),
679 gimple_assign_rhs1 (stmt
), a
);
680 gsi_insert_before (def_gsi
, stmt
, GSI_SAME_STMT
);
681 gsi_remove (def_gsi
, true);
682 *def_gsi
= gsi_for_stmt (stmt
);
683 fold_stmt_inplace (def_gsi
);
687 print_gimple_stmt (dump_file
, stmt
, 0, TDF_NONE
);
692 FOR_EACH_VEC_ELT (sqr_stmts
, i
, sqr_stmt
)
694 gimple_stmt_iterator gsi2
= gsi_for_stmt (sqr_stmt
);
695 gimple_assign_set_rhs_from_tree (&gsi2
, sqr_ssa_name
);
696 update_stmt (sqr_stmt
);
699 if (!mult_stmts
.is_empty ())
701 /* r2 = a * x. Transform this into:
702 r2 = t (The original sqrt (a)). */
704 gimple
*mult_stmt
= NULL
;
705 FOR_EACH_VEC_ELT (mult_stmts
, i
, mult_stmt
)
707 gimple_stmt_iterator gsi2
= gsi_for_stmt (mult_stmt
);
711 fprintf (dump_file
, "Replacing squaring multiplication\n");
712 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
713 fprintf (dump_file
, "with assignment\n");
715 gimple_assign_set_rhs_from_tree (&gsi2
, orig_sqrt_ssa_name
);
716 fold_stmt_inplace (&gsi2
);
717 update_stmt (mult_stmt
);
719 print_gimple_stmt (dump_file
, mult_stmt
, 0, TDF_NONE
);
725 /* Using the two temporaries tmp1, tmp2 from above
726 the original x is now:
728 gcc_assert (orig_sqrt_ssa_name
);
729 gcc_assert (sqr_ssa_name
);
732 = gimple_build_assign (x
, MULT_EXPR
,
733 orig_sqrt_ssa_name
, sqr_ssa_name
);
734 gsi_insert_after (def_gsi
, new_stmt
, GSI_NEW_STMT
);
739 /* Remove the original division. */
740 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
741 gsi_remove (&gsi2
, true);
745 release_ssa_name (x
);
748 /* Look for floating-point divisions among DEF's uses, and try to
749 replace them by multiplications with the reciprocal. Add
750 as many statements computing the reciprocal as needed.
752 DEF must be a GIMPLE register of a floating-point type. */
755 execute_cse_reciprocals_1 (gimple_stmt_iterator
*def_gsi
, tree def
)
757 use_operand_p use_p
, square_use_p
;
758 imm_use_iterator use_iter
, square_use_iter
;
760 struct occurrence
*occ
;
763 int square_recip_count
= 0;
764 int sqrt_recip_count
= 0;
766 gcc_assert (FLOAT_TYPE_P (TREE_TYPE (def
)) && TREE_CODE (def
) == SSA_NAME
);
767 threshold
= targetm
.min_divisions_for_recip_mul (TYPE_MODE (TREE_TYPE (def
)));
769 /* If DEF is a square (x * x), count the number of divisions by x.
770 If there are more divisions by x than by (DEF * DEF), prefer to optimize
771 the reciprocal of x instead of DEF. This improves cases like:
776 Reciprocal optimization of x results in 1 division rather than 2 or 3. */
777 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
779 if (is_gimple_assign (def_stmt
)
780 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
781 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
782 && gimple_assign_rhs1 (def_stmt
) == gimple_assign_rhs2 (def_stmt
))
784 tree op0
= gimple_assign_rhs1 (def_stmt
);
786 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, op0
)
788 gimple
*use_stmt
= USE_STMT (use_p
);
789 if (is_division_by (use_stmt
, op0
))
794 FOR_EACH_IMM_USE_FAST (use_p
, use_iter
, def
)
796 gimple
*use_stmt
= USE_STMT (use_p
);
797 if (is_division_by (use_stmt
, def
))
799 register_division_in (gimple_bb (use_stmt
), 2);
803 if (is_square_of (use_stmt
, def
))
805 square_def
= gimple_assign_lhs (use_stmt
);
806 FOR_EACH_IMM_USE_FAST (square_use_p
, square_use_iter
, square_def
)
808 gimple
*square_use_stmt
= USE_STMT (square_use_p
);
809 if (is_division_by (square_use_stmt
, square_def
))
811 /* This is executed twice for each division by a square. */
812 register_division_in (gimple_bb (square_use_stmt
), 1);
813 square_recip_count
++;
819 /* Square reciprocals were counted twice above. */
820 square_recip_count
/= 2;
822 /* If it is more profitable to optimize 1 / x, don't optimize 1 / (x * x). */
823 if (sqrt_recip_count
> square_recip_count
)
826 /* Do the expensive part only if we can hope to optimize something. */
827 if (count
+ square_recip_count
>= threshold
&& count
>= 1)
830 for (occ
= occ_head
; occ
; occ
= occ
->next
)
833 insert_reciprocals (def_gsi
, occ
, def
, NULL
, NULL
,
834 square_recip_count
, threshold
);
837 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, def
)
839 if (is_division_by (use_stmt
, def
))
841 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
842 replace_reciprocal (use_p
);
844 else if (square_recip_count
> 0 && is_square_of (use_stmt
, def
))
846 FOR_EACH_IMM_USE_ON_STMT (use_p
, use_iter
)
848 /* Find all uses of the square that are divisions and
849 * replace them by multiplications with the inverse. */
850 imm_use_iterator square_iterator
;
851 gimple
*powmult_use_stmt
= USE_STMT (use_p
);
852 tree powmult_def_name
= gimple_assign_lhs (powmult_use_stmt
);
854 FOR_EACH_IMM_USE_STMT (powmult_use_stmt
,
855 square_iterator
, powmult_def_name
)
856 FOR_EACH_IMM_USE_ON_STMT (square_use_p
, square_iterator
)
858 gimple
*powmult_use_stmt
= USE_STMT (square_use_p
);
859 if (is_division_by (powmult_use_stmt
, powmult_def_name
))
860 replace_reciprocal_squares (square_use_p
);
868 for (occ
= occ_head
; occ
; )
874 /* Return an internal function that implements the reciprocal of CALL,
875 or IFN_LAST if there is no such function that the target supports. */
878 internal_fn_reciprocal (gcall
*call
)
882 switch (gimple_call_combined_fn (call
))
893 tree_pair types
= direct_internal_fn_types (ifn
, call
);
894 if (!direct_internal_fn_supported_p (ifn
, types
, OPTIMIZE_FOR_SPEED
))
900 /* Go through all the floating-point SSA_NAMEs, and call
901 execute_cse_reciprocals_1 on each of them. */
904 const pass_data pass_data_cse_reciprocals
=
906 GIMPLE_PASS
, /* type */
908 OPTGROUP_NONE
, /* optinfo_flags */
909 TV_TREE_RECIP
, /* tv_id */
910 PROP_ssa
, /* properties_required */
911 0, /* properties_provided */
912 0, /* properties_destroyed */
913 0, /* todo_flags_start */
914 TODO_update_ssa
, /* todo_flags_finish */
917 class pass_cse_reciprocals
: public gimple_opt_pass
920 pass_cse_reciprocals (gcc::context
*ctxt
)
921 : gimple_opt_pass (pass_data_cse_reciprocals
, ctxt
)
924 /* opt_pass methods: */
925 bool gate (function
*) final override
927 return optimize
&& flag_reciprocal_math
;
929 unsigned int execute (function
*) final override
;
931 }; // class pass_cse_reciprocals
934 pass_cse_reciprocals::execute (function
*fun
)
939 occ_pool
= new object_allocator
<occurrence
> ("dominators for recip");
941 memset (&reciprocal_stats
, 0, sizeof (reciprocal_stats
));
942 calculate_dominance_info (CDI_DOMINATORS
);
943 calculate_dominance_info (CDI_POST_DOMINATORS
);
946 FOR_EACH_BB_FN (bb
, fun
)
947 gcc_assert (!bb
->aux
);
949 for (arg
= DECL_ARGUMENTS (fun
->decl
); arg
; arg
= DECL_CHAIN (arg
))
950 if (FLOAT_TYPE_P (TREE_TYPE (arg
))
951 && is_gimple_reg (arg
))
953 tree name
= ssa_default_def (fun
, arg
);
955 execute_cse_reciprocals_1 (NULL
, name
);
958 FOR_EACH_BB_FN (bb
, fun
)
962 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
965 gphi
*phi
= gsi
.phi ();
966 def
= PHI_RESULT (phi
);
967 if (! virtual_operand_p (def
)
968 && FLOAT_TYPE_P (TREE_TYPE (def
)))
969 execute_cse_reciprocals_1 (NULL
, def
);
972 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
975 gimple
*stmt
= gsi_stmt (gsi
);
977 if (gimple_has_lhs (stmt
)
978 && (def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
)) != NULL
979 && FLOAT_TYPE_P (TREE_TYPE (def
))
980 && TREE_CODE (def
) == SSA_NAME
)
982 execute_cse_reciprocals_1 (&gsi
, def
);
983 stmt
= gsi_stmt (gsi
);
984 if (flag_unsafe_math_optimizations
985 && is_gimple_assign (stmt
)
986 && gimple_assign_lhs (stmt
) == def
987 && !stmt_can_throw_internal (cfun
, stmt
)
988 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
989 optimize_recip_sqrt (&gsi
, def
);
993 if (optimize_bb_for_size_p (bb
))
996 /* Scan for a/func(b) and convert it to reciprocal a*rfunc(b). */
997 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
1000 gimple
*stmt
= gsi_stmt (gsi
);
1002 if (is_gimple_assign (stmt
)
1003 && gimple_assign_rhs_code (stmt
) == RDIV_EXPR
)
1005 tree arg1
= gimple_assign_rhs2 (stmt
);
1008 if (TREE_CODE (arg1
) != SSA_NAME
)
1011 stmt1
= SSA_NAME_DEF_STMT (arg1
);
1013 if (is_gimple_call (stmt1
)
1014 && gimple_call_lhs (stmt1
))
1017 imm_use_iterator ui
;
1018 use_operand_p use_p
;
1019 tree fndecl
= NULL_TREE
;
1021 gcall
*call
= as_a
<gcall
*> (stmt1
);
1022 internal_fn ifn
= internal_fn_reciprocal (call
);
1023 if (ifn
== IFN_LAST
)
1025 fndecl
= gimple_call_fndecl (call
);
1027 || !fndecl_built_in_p (fndecl
, BUILT_IN_MD
))
1029 fndecl
= targetm
.builtin_reciprocal (fndecl
);
1034 /* Check that all uses of the SSA name are divisions,
1035 otherwise replacing the defining statement will do
1038 FOR_EACH_IMM_USE_FAST (use_p
, ui
, arg1
)
1040 gimple
*stmt2
= USE_STMT (use_p
);
1041 if (is_gimple_debug (stmt2
))
1043 if (!is_gimple_assign (stmt2
)
1044 || gimple_assign_rhs_code (stmt2
) != RDIV_EXPR
1045 || gimple_assign_rhs1 (stmt2
) == arg1
1046 || gimple_assign_rhs2 (stmt2
) != arg1
)
1055 gimple_replace_ssa_lhs (call
, arg1
);
1056 if (gimple_call_internal_p (call
) != (ifn
!= IFN_LAST
))
1058 auto_vec
<tree
, 4> args
;
1059 for (unsigned int i
= 0;
1060 i
< gimple_call_num_args (call
); i
++)
1061 args
.safe_push (gimple_call_arg (call
, i
));
1063 if (ifn
== IFN_LAST
)
1064 stmt2
= gimple_build_call_vec (fndecl
, args
);
1066 stmt2
= gimple_build_call_internal_vec (ifn
, args
);
1067 gimple_call_set_lhs (stmt2
, arg1
);
1068 gimple_move_vops (stmt2
, call
);
1069 gimple_call_set_nothrow (stmt2
,
1070 gimple_call_nothrow_p (call
));
1071 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
1072 gsi_replace (&gsi2
, stmt2
, true);
1076 if (ifn
== IFN_LAST
)
1077 gimple_call_set_fndecl (call
, fndecl
);
1079 gimple_call_set_internal_fn (call
, ifn
);
1082 reciprocal_stats
.rfuncs_inserted
++;
1084 FOR_EACH_IMM_USE_STMT (stmt
, ui
, arg1
)
1086 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1087 gimple_assign_set_rhs_code (stmt
, MULT_EXPR
);
1088 fold_stmt_inplace (&gsi
);
1096 statistics_counter_event (fun
, "reciprocal divs inserted",
1097 reciprocal_stats
.rdivs_inserted
);
1098 statistics_counter_event (fun
, "reciprocal functions inserted",
1099 reciprocal_stats
.rfuncs_inserted
);
1101 free_dominance_info (CDI_DOMINATORS
);
1102 free_dominance_info (CDI_POST_DOMINATORS
);
1110 make_pass_cse_reciprocals (gcc::context
*ctxt
)
1112 return new pass_cse_reciprocals (ctxt
);
1115 /* If NAME is the result of a type conversion, look for other
1116 equivalent dominating or dominated conversions, and replace all
1117 uses with the earliest dominating name, removing the redundant
1118 conversions. Return the prevailing name. */
1121 execute_cse_conv_1 (tree name
, bool *cfg_changed
)
1123 if (SSA_NAME_IS_DEFAULT_DEF (name
)
1124 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name
))
1127 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
1129 if (!gimple_assign_cast_p (def_stmt
))
1132 tree src
= gimple_assign_rhs1 (def_stmt
);
1134 if (TREE_CODE (src
) != SSA_NAME
)
1137 imm_use_iterator use_iter
;
1140 /* Find the earliest dominating def. */
1141 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1143 if (use_stmt
== def_stmt
1144 || !gimple_assign_cast_p (use_stmt
))
1147 tree lhs
= gimple_assign_lhs (use_stmt
);
1149 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1150 || (gimple_assign_rhs1 (use_stmt
)
1151 != gimple_assign_rhs1 (def_stmt
))
1152 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1156 if (gimple_bb (def_stmt
) == gimple_bb (use_stmt
))
1158 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1159 while (!gsi_end_p (gsi
) && gsi_stmt (gsi
) != def_stmt
)
1161 use_dominates
= !gsi_end_p (gsi
);
1163 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
),
1164 gimple_bb (def_stmt
)))
1165 use_dominates
= false;
1166 else if (dominated_by_p (CDI_DOMINATORS
, gimple_bb (def_stmt
),
1167 gimple_bb (use_stmt
)))
1168 use_dominates
= true;
1174 std::swap (name
, lhs
);
1175 std::swap (def_stmt
, use_stmt
);
1179 /* Now go through all uses of SRC again, replacing the equivalent
1180 dominated conversions. We may replace defs that were not
1181 dominated by the then-prevailing defs when we first visited
1183 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, src
)
1185 if (use_stmt
== def_stmt
1186 || !gimple_assign_cast_p (use_stmt
))
1189 tree lhs
= gimple_assign_lhs (use_stmt
);
1191 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs
)
1192 || (gimple_assign_rhs1 (use_stmt
)
1193 != gimple_assign_rhs1 (def_stmt
))
1194 || !types_compatible_p (TREE_TYPE (name
), TREE_TYPE (lhs
)))
1197 basic_block use_bb
= gimple_bb (use_stmt
);
1198 if (gimple_bb (def_stmt
) == use_bb
1199 || dominated_by_p (CDI_DOMINATORS
, use_bb
, gimple_bb (def_stmt
)))
1201 sincos_stats
.conv_removed
++;
1203 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
1204 replace_uses_by (lhs
, name
);
1205 if (gsi_remove (&gsi
, true)
1206 && gimple_purge_dead_eh_edges (use_bb
))
1207 *cfg_changed
= true;
1208 release_defs (use_stmt
);
1215 /* Records an occurrence at statement USE_STMT in the vector of trees
1216 STMTS if it is dominated by *TOP_BB or dominates it or this basic block
1217 is not yet initialized. Returns true if the occurrence was pushed on
1218 the vector. Adjusts *TOP_BB to be the basic block dominating all
1219 statements in the vector. */
1222 maybe_record_sincos (vec
<gimple
*> *stmts
,
1223 basic_block
*top_bb
, gimple
*use_stmt
)
1225 basic_block use_bb
= gimple_bb (use_stmt
);
1227 && (*top_bb
== use_bb
1228 || dominated_by_p (CDI_DOMINATORS
, use_bb
, *top_bb
)))
1229 stmts
->safe_push (use_stmt
);
1231 || dominated_by_p (CDI_DOMINATORS
, *top_bb
, use_bb
))
1233 stmts
->safe_push (use_stmt
);
1242 /* Look for sin, cos and cexpi calls with the same argument NAME and
1243 create a single call to cexpi CSEing the result in this case.
1244 We first walk over all immediate uses of the argument collecting
1245 statements that we can CSE in a vector and in a second pass replace
1246 the statement rhs with a REALPART or IMAGPART expression on the
1247 result of the cexpi call we insert before the use statement that
1248 dominates all other candidates. */
1251 execute_cse_sincos_1 (tree name
)
1253 gimple_stmt_iterator gsi
;
1254 imm_use_iterator use_iter
;
1255 tree fndecl
, res
, type
= NULL_TREE
;
1256 gimple
*def_stmt
, *use_stmt
, *stmt
;
1257 int seen_cos
= 0, seen_sin
= 0, seen_cexpi
= 0;
1258 auto_vec
<gimple
*> stmts
;
1259 basic_block top_bb
= NULL
;
1261 bool cfg_changed
= false;
1263 name
= execute_cse_conv_1 (name
, &cfg_changed
);
1265 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, name
)
1267 if (gimple_code (use_stmt
) != GIMPLE_CALL
1268 || !gimple_call_lhs (use_stmt
))
1271 switch (gimple_call_combined_fn (use_stmt
))
1274 seen_cos
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1278 seen_sin
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1282 seen_cexpi
|= maybe_record_sincos (&stmts
, &top_bb
, use_stmt
) ? 1 : 0;
1289 tree t
= mathfn_built_in_type (gimple_call_combined_fn (use_stmt
));
1293 t
= TREE_TYPE (name
);
1295 /* This checks that NAME has the right type in the first round,
1296 and, in subsequent rounds, that the built_in type is the same
1297 type, or a compatible type. */
1298 if (type
!= t
&& !types_compatible_p (type
, t
))
1301 if (seen_cos
+ seen_sin
+ seen_cexpi
<= 1)
1304 /* Simply insert cexpi at the beginning of top_bb but not earlier than
1305 the name def statement. */
1306 fndecl
= mathfn_built_in (type
, BUILT_IN_CEXPI
);
1309 stmt
= gimple_build_call (fndecl
, 1, name
);
1310 res
= make_temp_ssa_name (TREE_TYPE (TREE_TYPE (fndecl
)), stmt
, "sincostmp");
1311 gimple_call_set_lhs (stmt
, res
);
1313 def_stmt
= SSA_NAME_DEF_STMT (name
);
1314 if (!SSA_NAME_IS_DEFAULT_DEF (name
)
1315 && gimple_code (def_stmt
) != GIMPLE_PHI
1316 && gimple_bb (def_stmt
) == top_bb
)
1318 gsi
= gsi_for_stmt (def_stmt
);
1319 gsi_insert_after (&gsi
, stmt
, GSI_SAME_STMT
);
1323 gsi
= gsi_after_labels (top_bb
);
1324 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1326 sincos_stats
.inserted
++;
1328 /* And adjust the recorded old call sites. */
1329 for (i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
1333 switch (gimple_call_combined_fn (use_stmt
))
1336 rhs
= fold_build1 (REALPART_EXPR
, type
, res
);
1340 rhs
= fold_build1 (IMAGPART_EXPR
, type
, res
);
1351 /* Replace call with a copy. */
1352 stmt
= gimple_build_assign (gimple_call_lhs (use_stmt
), rhs
);
1354 gsi
= gsi_for_stmt (use_stmt
);
1355 gsi_replace (&gsi
, stmt
, true);
1356 if (gimple_purge_dead_eh_edges (gimple_bb (stmt
)))
1363 /* To evaluate powi(x,n), the floating point value x raised to the
1364 constant integer exponent n, we use a hybrid algorithm that
1365 combines the "window method" with look-up tables. For an
1366 introduction to exponentiation algorithms and "addition chains",
1367 see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
1368 "Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
1369 3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
1370 Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
1372 /* Provide a default value for POWI_MAX_MULTS, the maximum number of
1373 multiplications to inline before calling the system library's pow
1374 function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
1375 so this default never requires calling pow, powf or powl. */
1377 #ifndef POWI_MAX_MULTS
1378 #define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
1381 /* The size of the "optimal power tree" lookup table. All
1382 exponents less than this value are simply looked up in the
1383 powi_table below. This threshold is also used to size the
1384 cache of pseudo registers that hold intermediate results. */
1385 #define POWI_TABLE_SIZE 256
1387 /* The size, in bits of the window, used in the "window method"
1388 exponentiation algorithm. This is equivalent to a radix of
1389 (1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
1390 #define POWI_WINDOW_SIZE 3
1392 /* The following table is an efficient representation of an
1393 "optimal power tree". For each value, i, the corresponding
1394 value, j, in the table states than an optimal evaluation
1395 sequence for calculating pow(x,i) can be found by evaluating
1396 pow(x,j)*pow(x,i-j). An optimal power tree for the first
1397 100 integers is given in Knuth's "Seminumerical algorithms". */
1399 static const unsigned char powi_table
[POWI_TABLE_SIZE
] =
1401 0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
1402 4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
1403 8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
1404 12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
1405 16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
1406 20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
1407 24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
1408 28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
1409 32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
1410 36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
1411 40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
1412 44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
1413 48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
1414 52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
1415 56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
1416 60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
1417 64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
1418 68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
1419 72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
1420 76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
1421 80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
1422 84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
1423 88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
1424 92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
1425 96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
1426 100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
1427 104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
1428 108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
1429 112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
1430 116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
1431 120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
1432 124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
1436 /* Return the number of multiplications required to calculate
1437 powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
1438 subroutine of powi_cost. CACHE is an array indicating
1439 which exponents have already been calculated. */
1442 powi_lookup_cost (unsigned HOST_WIDE_INT n
, bool *cache
)
1444 /* If we've already calculated this exponent, then this evaluation
1445 doesn't require any additional multiplications. */
1450 return powi_lookup_cost (n
- powi_table
[n
], cache
)
1451 + powi_lookup_cost (powi_table
[n
], cache
) + 1;
1454 /* Return the number of multiplications required to calculate
1455 powi(x,n) for an arbitrary x, given the exponent N. This
1456 function needs to be kept in sync with powi_as_mults below. */
1459 powi_cost (HOST_WIDE_INT n
)
1461 bool cache
[POWI_TABLE_SIZE
];
1462 unsigned HOST_WIDE_INT digit
;
1463 unsigned HOST_WIDE_INT val
;
1469 /* Ignore the reciprocal when calculating the cost. */
1472 /* Initialize the exponent cache. */
1473 memset (cache
, 0, POWI_TABLE_SIZE
* sizeof (bool));
1478 while (val
>= POWI_TABLE_SIZE
)
1482 digit
= val
& ((1 << POWI_WINDOW_SIZE
) - 1);
1483 result
+= powi_lookup_cost (digit
, cache
)
1484 + POWI_WINDOW_SIZE
+ 1;
1485 val
>>= POWI_WINDOW_SIZE
;
1494 return result
+ powi_lookup_cost (val
, cache
);
1497 /* Recursive subroutine of powi_as_mults. This function takes the
1498 array, CACHE, of already calculated exponents and an exponent N and
1499 returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
1502 powi_as_mults_1 (gimple_stmt_iterator
*gsi
, location_t loc
, tree type
,
1503 unsigned HOST_WIDE_INT n
, tree
*cache
)
1505 tree op0
, op1
, ssa_target
;
1506 unsigned HOST_WIDE_INT digit
;
1509 if (n
< POWI_TABLE_SIZE
&& cache
[n
])
1512 ssa_target
= make_temp_ssa_name (type
, NULL
, "powmult");
1514 if (n
< POWI_TABLE_SIZE
)
1516 cache
[n
] = ssa_target
;
1517 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- powi_table
[n
], cache
);
1518 op1
= powi_as_mults_1 (gsi
, loc
, type
, powi_table
[n
], cache
);
1522 digit
= n
& ((1 << POWI_WINDOW_SIZE
) - 1);
1523 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
- digit
, cache
);
1524 op1
= powi_as_mults_1 (gsi
, loc
, type
, digit
, cache
);
1528 op0
= powi_as_mults_1 (gsi
, loc
, type
, n
>> 1, cache
);
1532 mult_stmt
= gimple_build_assign (ssa_target
, MULT_EXPR
, op0
, op1
);
1533 gimple_set_location (mult_stmt
, loc
);
1534 gsi_insert_before (gsi
, mult_stmt
, GSI_SAME_STMT
);
1539 /* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
1540 This function needs to be kept in sync with powi_cost above. */
1543 powi_as_mults (gimple_stmt_iterator
*gsi
, location_t loc
,
1544 tree arg0
, HOST_WIDE_INT n
)
1546 tree cache
[POWI_TABLE_SIZE
], result
, type
= TREE_TYPE (arg0
);
1551 return build_one_cst (type
);
1553 memset (cache
, 0, sizeof (cache
));
1556 result
= powi_as_mults_1 (gsi
, loc
, type
, absu_hwi (n
), cache
);
1560 /* If the original exponent was negative, reciprocate the result. */
1561 target
= make_temp_ssa_name (type
, NULL
, "powmult");
1562 div_stmt
= gimple_build_assign (target
, RDIV_EXPR
,
1563 build_real (type
, dconst1
), result
);
1564 gimple_set_location (div_stmt
, loc
);
1565 gsi_insert_before (gsi
, div_stmt
, GSI_SAME_STMT
);
1570 /* ARG0 and N are the two arguments to a powi builtin in GSI with
1571 location info LOC. If the arguments are appropriate, create an
1572 equivalent sequence of statements prior to GSI using an optimal
1573 number of multiplications, and return an expession holding the
1577 gimple_expand_builtin_powi (gimple_stmt_iterator
*gsi
, location_t loc
,
1578 tree arg0
, HOST_WIDE_INT n
)
1580 if ((n
>= -1 && n
<= 2)
1581 || (optimize_function_for_speed_p (cfun
)
1582 && powi_cost (n
) <= POWI_MAX_MULTS
))
1583 return powi_as_mults (gsi
, loc
, arg0
, n
);
1588 /* Build a gimple call statement that calls FN with argument ARG.
1589 Set the lhs of the call statement to a fresh SSA name. Insert the
1590 statement prior to GSI's current position, and return the fresh
1594 build_and_insert_call (gimple_stmt_iterator
*gsi
, location_t loc
,
1600 call_stmt
= gimple_build_call (fn
, 1, arg
);
1601 ssa_target
= make_temp_ssa_name (TREE_TYPE (arg
), NULL
, "powroot");
1602 gimple_set_lhs (call_stmt
, ssa_target
);
1603 gimple_set_location (call_stmt
, loc
);
1604 gsi_insert_before (gsi
, call_stmt
, GSI_SAME_STMT
);
1609 /* Build a gimple binary operation with the given CODE and arguments
1610 ARG0, ARG1, assigning the result to a new SSA name for variable
1611 TARGET. Insert the statement prior to GSI's current position, and
1612 return the fresh SSA name.*/
1615 build_and_insert_binop (gimple_stmt_iterator
*gsi
, location_t loc
,
1616 const char *name
, enum tree_code code
,
1617 tree arg0
, tree arg1
)
1619 tree result
= make_temp_ssa_name (TREE_TYPE (arg0
), NULL
, name
);
1620 gassign
*stmt
= gimple_build_assign (result
, code
, arg0
, arg1
);
1621 gimple_set_location (stmt
, loc
);
1622 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1626 /* Build a gimple assignment to cast VAL to TYPE. Insert the statement
1627 prior to GSI's current position, and return the fresh SSA name. */
1630 build_and_insert_cast (gimple_stmt_iterator
*gsi
, location_t loc
,
1631 tree type
, tree val
)
1633 tree result
= make_ssa_name (type
);
1634 gassign
*stmt
= gimple_build_assign (result
, NOP_EXPR
, val
);
1635 gimple_set_location (stmt
, loc
);
1636 gsi_insert_before (gsi
, stmt
, GSI_SAME_STMT
);
1640 struct pow_synth_sqrt_info
1643 unsigned int deepest
;
1644 unsigned int num_mults
;
1647 /* Return true iff the real value C can be represented as a
1648 sum of powers of 0.5 up to N. That is:
1649 C == SUM<i from 1..N> (a[i]*(0.5**i)) where a[i] is either 0 or 1.
1650 Record in INFO the various parameters of the synthesis algorithm such
1651 as the factors a[i], the maximum 0.5 power and the number of
1652 multiplications that will be required. */
1655 representable_as_half_series_p (REAL_VALUE_TYPE c
, unsigned n
,
1656 struct pow_synth_sqrt_info
*info
)
1658 REAL_VALUE_TYPE factor
= dconsthalf
;
1659 REAL_VALUE_TYPE remainder
= c
;
1662 info
->num_mults
= 0;
1663 memset (info
->factors
, 0, n
* sizeof (bool));
1665 for (unsigned i
= 0; i
< n
; i
++)
1667 REAL_VALUE_TYPE res
;
1669 /* If something inexact happened bail out now. */
1670 if (real_arithmetic (&res
, MINUS_EXPR
, &remainder
, &factor
))
1673 /* We have hit zero. The number is representable as a sum
1674 of powers of 0.5. */
1675 if (real_equal (&res
, &dconst0
))
1677 info
->factors
[i
] = true;
1678 info
->deepest
= i
+ 1;
1681 else if (!REAL_VALUE_NEGATIVE (res
))
1684 info
->factors
[i
] = true;
1688 info
->factors
[i
] = false;
1690 real_arithmetic (&factor
, MULT_EXPR
, &factor
, &dconsthalf
);
1695 /* Return the tree corresponding to FN being applied
1696 to ARG N times at GSI and LOC.
1697 Look up previous results from CACHE if need be.
1698 cache[0] should contain just plain ARG i.e. FN applied to ARG 0 times. */
1701 get_fn_chain (tree arg
, unsigned int n
, gimple_stmt_iterator
*gsi
,
1702 tree fn
, location_t loc
, tree
*cache
)
1704 tree res
= cache
[n
];
1707 tree prev
= get_fn_chain (arg
, n
- 1, gsi
, fn
, loc
, cache
);
1708 res
= build_and_insert_call (gsi
, loc
, fn
, prev
);
1715 /* Print to STREAM the repeated application of function FNAME to ARG
1716 N times. So, for FNAME = "foo", ARG = "x", N = 2 it would print:
1720 print_nested_fn (FILE* stream
, const char *fname
, const char* arg
,
1724 fprintf (stream
, "%s", arg
);
1727 fprintf (stream
, "%s (", fname
);
1728 print_nested_fn (stream
, fname
, arg
, n
- 1);
1729 fprintf (stream
, ")");
1733 /* Print to STREAM the fractional sequence of sqrt chains
1734 applied to ARG, described by INFO. Used for the dump file. */
1737 dump_fractional_sqrt_sequence (FILE *stream
, const char *arg
,
1738 struct pow_synth_sqrt_info
*info
)
1740 for (unsigned int i
= 0; i
< info
->deepest
; i
++)
1742 bool is_set
= info
->factors
[i
];
1745 print_nested_fn (stream
, "sqrt", arg
, i
+ 1);
1746 if (i
!= info
->deepest
- 1)
1747 fprintf (stream
, " * ");
1752 /* Print to STREAM a representation of raising ARG to an integer
1753 power N. Used for the dump file. */
1756 dump_integer_part (FILE *stream
, const char* arg
, HOST_WIDE_INT n
)
1759 fprintf (stream
, "powi (%s, " HOST_WIDE_INT_PRINT_DEC
")", arg
, n
);
1761 fprintf (stream
, "%s", arg
);
1764 /* Attempt to synthesize a POW[F] (ARG0, ARG1) call using chains of
1765 square roots. Place at GSI and LOC. Limit the maximum depth
1766 of the sqrt chains to MAX_DEPTH. Return the tree holding the
1767 result of the expanded sequence or NULL_TREE if the expansion failed.
1769 This routine assumes that ARG1 is a real number with a fractional part
1770 (the integer exponent case will have been handled earlier in
1771 gimple_expand_builtin_pow).
1774 * For ARG1 composed of a whole part WHOLE_PART and a fractional part
1775 FRAC_PART i.e. WHOLE_PART == floor (ARG1) and
1776 FRAC_PART == ARG1 - WHOLE_PART:
1777 Produce POWI (ARG0, WHOLE_PART) * POW (ARG0, FRAC_PART) where
1778 POW (ARG0, FRAC_PART) is expanded as a product of square root chains
1779 if it can be expressed as such, that is if FRAC_PART satisfies:
1780 FRAC_PART == <SUM from i = 1 until MAX_DEPTH> (a[i] * (0.5**i))
1781 where integer a[i] is either 0 or 1.
1784 POW (x, 3.625) == POWI (x, 3) * POW (x, 0.625)
1785 --> POWI (x, 3) * SQRT (x) * SQRT (SQRT (SQRT (x)))
1787 For ARG1 < 0.0 there are two approaches:
1788 * (A) Expand to 1.0 / POW (ARG0, -ARG1) where POW (ARG0, -ARG1)
1789 is calculated as above.
1792 POW (x, -5.625) == 1.0 / POW (x, 5.625)
1793 --> 1.0 / (POWI (x, 5) * SQRT (x) * SQRT (SQRT (SQRT (x))))
1795 * (B) : WHOLE_PART := - ceil (abs (ARG1))
1796 FRAC_PART := ARG1 - WHOLE_PART
1797 and expand to POW (x, FRAC_PART) / POWI (x, WHOLE_PART).
1799 POW (x, -5.875) == POW (x, 0.125) / POWI (X, 6)
1800 --> SQRT (SQRT (SQRT (x))) / (POWI (x, 6))
1802 For ARG1 < 0.0 we choose between (A) and (B) depending on
1803 how many multiplications we'd have to do.
1804 So, for the example in (B): POW (x, -5.875), if we were to
1805 follow algorithm (A) we would produce:
1806 1.0 / POWI (X, 5) * SQRT (X) * SQRT (SQRT (X)) * SQRT (SQRT (SQRT (X)))
1807 which contains more multiplications than approach (B).
1809 Hopefully, this approach will eliminate potentially expensive POW library
1810 calls when unsafe floating point math is enabled and allow the compiler to
1811 further optimise the multiplies, square roots and divides produced by this
1815 expand_pow_as_sqrts (gimple_stmt_iterator
*gsi
, location_t loc
,
1816 tree arg0
, tree arg1
, HOST_WIDE_INT max_depth
)
1818 tree type
= TREE_TYPE (arg0
);
1819 machine_mode mode
= TYPE_MODE (type
);
1820 tree sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
1821 bool one_over
= true;
1826 if (TREE_CODE (arg1
) != REAL_CST
)
1829 REAL_VALUE_TYPE exp_init
= TREE_REAL_CST (arg1
);
1831 gcc_assert (max_depth
> 0);
1832 tree
*cache
= XALLOCAVEC (tree
, max_depth
+ 1);
1834 struct pow_synth_sqrt_info synth_info
;
1835 synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1836 synth_info
.deepest
= 0;
1837 synth_info
.num_mults
= 0;
1839 bool neg_exp
= REAL_VALUE_NEGATIVE (exp_init
);
1840 REAL_VALUE_TYPE exp
= real_value_abs (&exp_init
);
1842 /* The whole and fractional parts of exp. */
1843 REAL_VALUE_TYPE whole_part
;
1844 REAL_VALUE_TYPE frac_part
;
1846 real_floor (&whole_part
, mode
, &exp
);
1847 real_arithmetic (&frac_part
, MINUS_EXPR
, &exp
, &whole_part
);
1850 REAL_VALUE_TYPE ceil_whole
= dconst0
;
1851 REAL_VALUE_TYPE ceil_fract
= dconst0
;
1855 real_ceil (&ceil_whole
, mode
, &exp
);
1856 real_arithmetic (&ceil_fract
, MINUS_EXPR
, &ceil_whole
, &exp
);
1859 if (!representable_as_half_series_p (frac_part
, max_depth
, &synth_info
))
1862 /* Check whether it's more profitable to not use 1.0 / ... */
1865 struct pow_synth_sqrt_info alt_synth_info
;
1866 alt_synth_info
.factors
= XALLOCAVEC (bool, max_depth
+ 1);
1867 alt_synth_info
.deepest
= 0;
1868 alt_synth_info
.num_mults
= 0;
1870 if (representable_as_half_series_p (ceil_fract
, max_depth
,
1872 && alt_synth_info
.deepest
<= synth_info
.deepest
1873 && alt_synth_info
.num_mults
< synth_info
.num_mults
)
1875 whole_part
= ceil_whole
;
1876 frac_part
= ceil_fract
;
1877 synth_info
.deepest
= alt_synth_info
.deepest
;
1878 synth_info
.num_mults
= alt_synth_info
.num_mults
;
1879 memcpy (synth_info
.factors
, alt_synth_info
.factors
,
1880 (max_depth
+ 1) * sizeof (bool));
1885 HOST_WIDE_INT n
= real_to_integer (&whole_part
);
1886 REAL_VALUE_TYPE cint
;
1887 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
1889 if (!real_identical (&whole_part
, &cint
))
1892 if (powi_cost (n
) + synth_info
.num_mults
> POWI_MAX_MULTS
)
1895 memset (cache
, 0, (max_depth
+ 1) * sizeof (tree
));
1897 tree integer_res
= n
== 0 ? build_real (type
, dconst1
) : arg0
;
1899 /* Calculate the integer part of the exponent. */
1902 integer_res
= gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
1911 real_to_decimal (string
, &exp_init
, sizeof (string
), 0, 1);
1912 fprintf (dump_file
, "synthesizing pow (x, %s) as:\n", string
);
1918 fprintf (dump_file
, "1.0 / (");
1919 dump_integer_part (dump_file
, "x", n
);
1921 fprintf (dump_file
, " * ");
1922 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1923 fprintf (dump_file
, ")");
1927 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1928 fprintf (dump_file
, " / (");
1929 dump_integer_part (dump_file
, "x", n
);
1930 fprintf (dump_file
, ")");
1935 dump_fractional_sqrt_sequence (dump_file
, "x", &synth_info
);
1937 fprintf (dump_file
, " * ");
1938 dump_integer_part (dump_file
, "x", n
);
1941 fprintf (dump_file
, "\ndeepest sqrt chain: %d\n", synth_info
.deepest
);
1945 tree fract_res
= NULL_TREE
;
1948 /* Calculate the fractional part of the exponent. */
1949 for (unsigned i
= 0; i
< synth_info
.deepest
; i
++)
1951 if (synth_info
.factors
[i
])
1953 tree sqrt_chain
= get_fn_chain (arg0
, i
+ 1, gsi
, sqrtfn
, loc
, cache
);
1956 fract_res
= sqrt_chain
;
1959 fract_res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1960 fract_res
, sqrt_chain
);
1964 tree res
= NULL_TREE
;
1971 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1972 fract_res
, integer_res
);
1976 res
= build_and_insert_binop (gsi
, loc
, "powrootrecip", RDIV_EXPR
,
1977 build_real (type
, dconst1
), res
);
1981 res
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
1982 fract_res
, integer_res
);
1986 res
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
1987 fract_res
, integer_res
);
1991 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
1992 with location info LOC. If possible, create an equivalent and
1993 less expensive sequence of statements prior to GSI, and return an
1994 expession holding the result. */
1997 gimple_expand_builtin_pow (gimple_stmt_iterator
*gsi
, location_t loc
,
1998 tree arg0
, tree arg1
)
2000 REAL_VALUE_TYPE c
, cint
, dconst1_3
, dconst1_4
, dconst1_6
;
2001 REAL_VALUE_TYPE c2
, dconst3
;
2003 tree type
, sqrtfn
, cbrtfn
, sqrt_arg0
, result
, cbrt_x
, powi_cbrt_x
;
2005 bool speed_p
= optimize_bb_for_speed_p (gsi_bb (*gsi
));
2006 bool hw_sqrt_exists
, c_is_int
, c2_is_int
;
2008 dconst1_4
= dconst1
;
2009 SET_REAL_EXP (&dconst1_4
, REAL_EXP (&dconst1_4
) - 2);
2011 /* If the exponent isn't a constant, there's nothing of interest
2013 if (TREE_CODE (arg1
) != REAL_CST
)
2016 /* Don't perform the operation if flag_signaling_nans is on
2017 and the operand is a signaling NaN. */
2018 if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1
)))
2019 && ((TREE_CODE (arg0
) == REAL_CST
2020 && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg0
)))
2021 || REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1
))))
2024 /* If the exponent is equivalent to an integer, expand to an optimal
2025 multiplication sequence when profitable. */
2026 c
= TREE_REAL_CST (arg1
);
2027 n
= real_to_integer (&c
);
2028 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2029 c_is_int
= real_identical (&c
, &cint
);
2032 && ((n
>= -1 && n
<= 2)
2033 || (flag_unsafe_math_optimizations
2035 && powi_cost (n
) <= POWI_MAX_MULTS
)))
2036 return gimple_expand_builtin_powi (gsi
, loc
, arg0
, n
);
2038 /* Attempt various optimizations using sqrt and cbrt. */
2039 type
= TREE_TYPE (arg0
);
2040 mode
= TYPE_MODE (type
);
2041 sqrtfn
= mathfn_built_in (type
, BUILT_IN_SQRT
);
2043 /* Optimize pow(x,0.5) = sqrt(x). This replacement is always safe
2044 unless signed zeros must be maintained. pow(-0,0.5) = +0, while
2047 && real_equal (&c
, &dconsthalf
)
2048 && !HONOR_SIGNED_ZEROS (mode
))
2049 return build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2051 hw_sqrt_exists
= optab_handler (sqrt_optab
, mode
) != CODE_FOR_nothing
;
2053 /* Optimize pow(x,1./3.) = cbrt(x). This requires unsafe math
2054 optimizations since 1./3. is not exactly representable. If x
2055 is negative and finite, the correct value of pow(x,1./3.) is
2056 a NaN with the "invalid" exception raised, because the value
2057 of 1./3. actually has an even denominator. The correct value
2058 of cbrt(x) is a negative real value. */
2059 cbrtfn
= mathfn_built_in (type
, BUILT_IN_CBRT
);
2060 dconst1_3
= real_value_truncate (mode
, dconst_third ());
2062 if (flag_unsafe_math_optimizations
2064 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2065 && real_equal (&c
, &dconst1_3
))
2066 return build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2068 /* Optimize pow(x,1./6.) = cbrt(sqrt(x)). Don't do this optimization
2069 if we don't have a hardware sqrt insn. */
2070 dconst1_6
= dconst1_3
;
2071 SET_REAL_EXP (&dconst1_6
, REAL_EXP (&dconst1_6
) - 1);
2073 if (flag_unsafe_math_optimizations
2076 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2079 && real_equal (&c
, &dconst1_6
))
2082 sqrt_arg0
= build_and_insert_call (gsi
, loc
, sqrtfn
, arg0
);
2085 return build_and_insert_call (gsi
, loc
, cbrtfn
, sqrt_arg0
);
2089 /* Attempt to expand the POW as a product of square root chains.
2090 Expand the 0.25 case even when otpimising for size. */
2091 if (flag_unsafe_math_optimizations
2094 && (speed_p
|| real_equal (&c
, &dconst1_4
))
2095 && !HONOR_SIGNED_ZEROS (mode
))
2097 unsigned int max_depth
= speed_p
2098 ? param_max_pow_sqrt_depth
2101 tree expand_with_sqrts
2102 = expand_pow_as_sqrts (gsi
, loc
, arg0
, arg1
, max_depth
);
2104 if (expand_with_sqrts
)
2105 return expand_with_sqrts
;
2108 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst2
);
2109 n
= real_to_integer (&c2
);
2110 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2111 c2_is_int
= real_identical (&c2
, &cint
);
2113 /* Optimize pow(x,c), where 3c = n for some nonzero integer n, into
2115 powi(x, n/3) * powi(cbrt(x), n%3), n > 0;
2116 1.0 / (powi(x, abs(n)/3) * powi(cbrt(x), abs(n)%3)), n < 0.
2118 Do not calculate the first factor when n/3 = 0. As cbrt(x) is
2119 different from pow(x, 1./3.) due to rounding and behavior with
2120 negative x, we need to constrain this transformation to unsafe
2121 math and positive x or finite math. */
2122 real_from_integer (&dconst3
, VOIDmode
, 3, SIGNED
);
2123 real_arithmetic (&c2
, MULT_EXPR
, &c
, &dconst3
);
2124 real_round (&c2
, mode
, &c2
);
2125 n
= real_to_integer (&c2
);
2126 real_from_integer (&cint
, VOIDmode
, n
, SIGNED
);
2127 real_arithmetic (&c2
, RDIV_EXPR
, &cint
, &dconst3
);
2128 real_convert (&c2
, mode
, &c2
);
2130 if (flag_unsafe_math_optimizations
2132 && (!HONOR_NANS (mode
) || tree_expr_nonnegative_p (arg0
))
2133 && real_identical (&c2
, &c
)
2135 && optimize_function_for_speed_p (cfun
)
2136 && powi_cost (n
/ 3) <= POWI_MAX_MULTS
)
2138 tree powi_x_ndiv3
= NULL_TREE
;
2140 /* Attempt to fold powi(arg0, abs(n/3)) into multiplies. If not
2141 possible or profitable, give up. Skip the degenerate case when
2142 abs(n) < 3, where the result is always 1. */
2143 if (absu_hwi (n
) >= 3)
2145 powi_x_ndiv3
= gimple_expand_builtin_powi (gsi
, loc
, arg0
,
2151 /* Calculate powi(cbrt(x), n%3). Don't use gimple_expand_builtin_powi
2152 as that creates an unnecessary variable. Instead, just produce
2153 either cbrt(x) or cbrt(x) * cbrt(x). */
2154 cbrt_x
= build_and_insert_call (gsi
, loc
, cbrtfn
, arg0
);
2156 if (absu_hwi (n
) % 3 == 1)
2157 powi_cbrt_x
= cbrt_x
;
2159 powi_cbrt_x
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2162 /* Multiply the two subexpressions, unless powi(x,abs(n)/3) = 1. */
2163 if (absu_hwi (n
) < 3)
2164 result
= powi_cbrt_x
;
2166 result
= build_and_insert_binop (gsi
, loc
, "powroot", MULT_EXPR
,
2167 powi_x_ndiv3
, powi_cbrt_x
);
2169 /* If n is negative, reciprocate the result. */
2171 result
= build_and_insert_binop (gsi
, loc
, "powroot", RDIV_EXPR
,
2172 build_real (type
, dconst1
), result
);
2177 /* No optimizations succeeded. */
2181 /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
2182 on the SSA_NAME argument of each of them. */
2186 const pass_data pass_data_cse_sincos
=
2188 GIMPLE_PASS
, /* type */
2189 "sincos", /* name */
2190 OPTGROUP_NONE
, /* optinfo_flags */
2191 TV_TREE_SINCOS
, /* tv_id */
2192 PROP_ssa
, /* properties_required */
2193 0, /* properties_provided */
2194 0, /* properties_destroyed */
2195 0, /* todo_flags_start */
2196 TODO_update_ssa
, /* todo_flags_finish */
2199 class pass_cse_sincos
: public gimple_opt_pass
2202 pass_cse_sincos (gcc::context
*ctxt
)
2203 : gimple_opt_pass (pass_data_cse_sincos
, ctxt
)
2206 /* opt_pass methods: */
2207 bool gate (function
*) final override
2212 unsigned int execute (function
*) final override
;
2214 }; // class pass_cse_sincos
2217 pass_cse_sincos::execute (function
*fun
)
2220 bool cfg_changed
= false;
2222 calculate_dominance_info (CDI_DOMINATORS
);
2223 memset (&sincos_stats
, 0, sizeof (sincos_stats
));
2225 FOR_EACH_BB_FN (bb
, fun
)
2227 gimple_stmt_iterator gsi
;
2229 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2231 gimple
*stmt
= gsi_stmt (gsi
);
2233 if (is_gimple_call (stmt
)
2234 && gimple_call_lhs (stmt
))
2237 switch (gimple_call_combined_fn (stmt
))
2242 arg
= gimple_call_arg (stmt
, 0);
2243 /* Make sure we have either sincos or cexp. */
2244 if (!targetm
.libc_has_function (function_c99_math_complex
,
2246 && !targetm
.libc_has_function (function_sincos
,
2250 if (TREE_CODE (arg
) == SSA_NAME
)
2251 cfg_changed
|= execute_cse_sincos_1 (arg
);
2260 statistics_counter_event (fun
, "sincos statements inserted",
2261 sincos_stats
.inserted
);
2262 statistics_counter_event (fun
, "conv statements removed",
2263 sincos_stats
.conv_removed
);
2265 return cfg_changed
? TODO_cleanup_cfg
: 0;
2271 make_pass_cse_sincos (gcc::context
*ctxt
)
2273 return new pass_cse_sincos (ctxt
);
2276 /* Expand powi(x,n) into an optimal number of multiplies, when n is a
2280 const pass_data pass_data_expand_pow
=
2282 GIMPLE_PASS
, /* type */
2284 OPTGROUP_NONE
, /* optinfo_flags */
2285 TV_TREE_POW
, /* tv_id */
2286 PROP_ssa
, /* properties_required */
2287 PROP_gimple_opt_math
, /* properties_provided */
2288 0, /* properties_destroyed */
2289 0, /* todo_flags_start */
2290 TODO_update_ssa
, /* todo_flags_finish */
2293 class pass_expand_pow
: public gimple_opt_pass
2296 pass_expand_pow (gcc::context
*ctxt
)
2297 : gimple_opt_pass (pass_data_expand_pow
, ctxt
)
2300 /* opt_pass methods: */
2301 bool gate (function
*) final override
2306 unsigned int execute (function
*) final override
;
2308 }; // class pass_expand_pow
2311 pass_expand_pow::execute (function
*fun
)
2314 bool cfg_changed
= false;
2316 calculate_dominance_info (CDI_DOMINATORS
);
2318 FOR_EACH_BB_FN (bb
, fun
)
2320 gimple_stmt_iterator gsi
;
2321 bool cleanup_eh
= false;
2323 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2325 gimple
*stmt
= gsi_stmt (gsi
);
2327 /* Only the last stmt in a bb could throw, no need to call
2328 gimple_purge_dead_eh_edges if we change something in the middle
2329 of a basic block. */
2332 if (is_gimple_call (stmt
)
2333 && gimple_call_lhs (stmt
))
2335 tree arg0
, arg1
, result
;
2339 switch (gimple_call_combined_fn (stmt
))
2342 arg0
= gimple_call_arg (stmt
, 0);
2343 arg1
= gimple_call_arg (stmt
, 1);
2345 loc
= gimple_location (stmt
);
2346 result
= gimple_expand_builtin_pow (&gsi
, loc
, arg0
, arg1
);
2350 tree lhs
= gimple_get_lhs (stmt
);
2351 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2352 gimple_set_location (new_stmt
, loc
);
2353 unlink_stmt_vdef (stmt
);
2354 gsi_replace (&gsi
, new_stmt
, true);
2356 if (gimple_vdef (stmt
))
2357 release_ssa_name (gimple_vdef (stmt
));
2362 arg0
= gimple_call_arg (stmt
, 0);
2363 arg1
= gimple_call_arg (stmt
, 1);
2364 loc
= gimple_location (stmt
);
2366 if (real_minus_onep (arg0
))
2368 tree t0
, t1
, cond
, one
, minus_one
;
2371 t0
= TREE_TYPE (arg0
);
2372 t1
= TREE_TYPE (arg1
);
2373 one
= build_real (t0
, dconst1
);
2374 minus_one
= build_real (t0
, dconstm1
);
2376 cond
= make_temp_ssa_name (t1
, NULL
, "powi_cond");
2377 stmt
= gimple_build_assign (cond
, BIT_AND_EXPR
,
2378 arg1
, build_int_cst (t1
, 1));
2379 gimple_set_location (stmt
, loc
);
2380 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2382 result
= make_temp_ssa_name (t0
, NULL
, "powi");
2383 stmt
= gimple_build_assign (result
, COND_EXPR
, cond
,
2385 gimple_set_location (stmt
, loc
);
2386 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
2390 if (!tree_fits_shwi_p (arg1
))
2393 n
= tree_to_shwi (arg1
);
2394 result
= gimple_expand_builtin_powi (&gsi
, loc
, arg0
, n
);
2399 tree lhs
= gimple_get_lhs (stmt
);
2400 gassign
*new_stmt
= gimple_build_assign (lhs
, result
);
2401 gimple_set_location (new_stmt
, loc
);
2402 unlink_stmt_vdef (stmt
);
2403 gsi_replace (&gsi
, new_stmt
, true);
2405 if (gimple_vdef (stmt
))
2406 release_ssa_name (gimple_vdef (stmt
));
2415 cfg_changed
|= gimple_purge_dead_eh_edges (bb
);
2418 return cfg_changed
? TODO_cleanup_cfg
: 0;
2424 make_pass_expand_pow (gcc::context
*ctxt
)
2426 return new pass_expand_pow (ctxt
);
2429 /* Return true if stmt is a type conversion operation that can be stripped
2430 when used in a widening multiply operation. */
2432 widening_mult_conversion_strippable_p (tree result_type
, gimple
*stmt
)
2434 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
2436 if (TREE_CODE (result_type
) == INTEGER_TYPE
)
2441 if (!CONVERT_EXPR_CODE_P (rhs_code
))
2444 op_type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2446 /* If the type of OP has the same precision as the result, then
2447 we can strip this conversion. The multiply operation will be
2448 selected to create the correct extension as a by-product. */
2449 if (TYPE_PRECISION (result_type
) == TYPE_PRECISION (op_type
))
2452 /* We can also strip a conversion if it preserves the signed-ness of
2453 the operation and doesn't narrow the range. */
2454 inner_op_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
2456 /* If the inner-most type is unsigned, then we can strip any
2457 intermediate widening operation. If it's signed, then the
2458 intermediate widening operation must also be signed. */
2459 if ((TYPE_UNSIGNED (inner_op_type
)
2460 || TYPE_UNSIGNED (op_type
) == TYPE_UNSIGNED (inner_op_type
))
2461 && TYPE_PRECISION (op_type
) > TYPE_PRECISION (inner_op_type
))
2467 return rhs_code
== FIXED_CONVERT_EXPR
;
2470 /* Return true if RHS is a suitable operand for a widening multiplication,
2471 assuming a target type of TYPE.
2472 There are two cases:
2474 - RHS makes some value at least twice as wide. Store that value
2475 in *NEW_RHS_OUT if so, and store its type in *TYPE_OUT.
2477 - RHS is an integer constant. Store that value in *NEW_RHS_OUT if so,
2478 but leave *TYPE_OUT untouched. */
2481 is_widening_mult_rhs_p (tree type
, tree rhs
, tree
*type_out
,
2487 if (TREE_CODE (rhs
) == SSA_NAME
)
2489 /* Use tree_non_zero_bits to see if this operand is zero_extended
2490 for unsigned widening multiplications or non-negative for
2491 signed widening multiplications. */
2492 if (TREE_CODE (type
) == INTEGER_TYPE
2493 && (TYPE_PRECISION (type
) & 1) == 0
2494 && int_mode_for_size (TYPE_PRECISION (type
) / 2, 1).exists ())
2496 unsigned int prec
= TYPE_PRECISION (type
);
2497 unsigned int hprec
= prec
/ 2;
2498 wide_int bits
= wide_int::from (tree_nonzero_bits (rhs
), prec
,
2499 TYPE_SIGN (TREE_TYPE (rhs
)));
2500 if (TYPE_UNSIGNED (type
)
2501 && wi::bit_and (bits
, wi::mask (hprec
, true, prec
)) == 0)
2503 *type_out
= build_nonstandard_integer_type (hprec
, true);
2504 /* X & MODE_MASK can be simplified to (T)X. */
2505 stmt
= SSA_NAME_DEF_STMT (rhs
);
2506 if (is_gimple_assign (stmt
)
2507 && gimple_assign_rhs_code (stmt
) == BIT_AND_EXPR
2508 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == INTEGER_CST
2509 && wide_int::from (wi::to_wide (gimple_assign_rhs2 (stmt
)),
2510 prec
, TYPE_SIGN (TREE_TYPE (rhs
)))
2511 == wi::mask (hprec
, false, prec
))
2512 *new_rhs_out
= gimple_assign_rhs1 (stmt
);
2517 else if (!TYPE_UNSIGNED (type
)
2518 && wi::bit_and (bits
, wi::mask (hprec
- 1, true, prec
)) == 0)
2520 *type_out
= build_nonstandard_integer_type (hprec
, false);
2526 stmt
= SSA_NAME_DEF_STMT (rhs
);
2527 if (is_gimple_assign (stmt
))
2530 if (widening_mult_conversion_strippable_p (type
, stmt
))
2532 rhs1
= gimple_assign_rhs1 (stmt
);
2534 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2536 *new_rhs_out
= rhs1
;
2547 type1
= TREE_TYPE (rhs1
);
2549 if (TREE_CODE (type1
) != TREE_CODE (type
)
2550 || TYPE_PRECISION (type1
) * 2 > TYPE_PRECISION (type
))
2553 *new_rhs_out
= rhs1
;
2558 if (TREE_CODE (rhs
) == INTEGER_CST
)
2568 /* Return true if STMT performs a widening multiplication, assuming the
2569 output type is TYPE. If so, store the unwidened types of the operands
2570 in *TYPE1_OUT and *TYPE2_OUT respectively. Also fill *RHS1_OUT and
2571 *RHS2_OUT such that converting those operands to types *TYPE1_OUT
2572 and *TYPE2_OUT would give the operands of the multiplication. */
2575 is_widening_mult_p (gimple
*stmt
,
2576 tree
*type1_out
, tree
*rhs1_out
,
2577 tree
*type2_out
, tree
*rhs2_out
)
2579 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
2581 if (TREE_CODE (type
) == INTEGER_TYPE
)
2583 if (TYPE_OVERFLOW_TRAPS (type
))
2586 else if (TREE_CODE (type
) != FIXED_POINT_TYPE
)
2589 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs1 (stmt
), type1_out
,
2593 if (!is_widening_mult_rhs_p (type
, gimple_assign_rhs2 (stmt
), type2_out
,
2597 if (*type1_out
== NULL
)
2599 if (*type2_out
== NULL
|| !int_fits_type_p (*rhs1_out
, *type2_out
))
2601 *type1_out
= *type2_out
;
2604 if (*type2_out
== NULL
)
2606 if (!int_fits_type_p (*rhs2_out
, *type1_out
))
2608 *type2_out
= *type1_out
;
2611 /* Ensure that the larger of the two operands comes first. */
2612 if (TYPE_PRECISION (*type1_out
) < TYPE_PRECISION (*type2_out
))
2614 std::swap (*type1_out
, *type2_out
);
2615 std::swap (*rhs1_out
, *rhs2_out
);
2621 /* Check to see if the CALL statement is an invocation of copysign
2622 with 1. being the first argument. */
2624 is_copysign_call_with_1 (gimple
*call
)
2626 gcall
*c
= dyn_cast
<gcall
*> (call
);
2630 enum combined_fn code
= gimple_call_combined_fn (c
);
2632 if (code
== CFN_LAST
)
2635 if (builtin_fn_p (code
))
2637 switch (as_builtin_fn (code
))
2639 CASE_FLT_FN (BUILT_IN_COPYSIGN
):
2640 CASE_FLT_FN_FLOATN_NX (BUILT_IN_COPYSIGN
):
2641 return real_onep (gimple_call_arg (c
, 0));
2647 if (internal_fn_p (code
))
2649 switch (as_internal_fn (code
))
2652 return real_onep (gimple_call_arg (c
, 0));
2661 /* Try to expand the pattern x * copysign (1, y) into xorsign (x, y).
2662 This only happens when the xorsign optab is defined, if the
2663 pattern is not a xorsign pattern or if expansion fails FALSE is
2664 returned, otherwise TRUE is returned. */
2666 convert_expand_mult_copysign (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2668 tree treeop0
, treeop1
, lhs
, type
;
2669 location_t loc
= gimple_location (stmt
);
2670 lhs
= gimple_assign_lhs (stmt
);
2671 treeop0
= gimple_assign_rhs1 (stmt
);
2672 treeop1
= gimple_assign_rhs2 (stmt
);
2673 type
= TREE_TYPE (lhs
);
2674 machine_mode mode
= TYPE_MODE (type
);
2676 if (HONOR_SNANS (type
))
2679 if (TREE_CODE (treeop0
) == SSA_NAME
&& TREE_CODE (treeop1
) == SSA_NAME
)
2681 gimple
*call0
= SSA_NAME_DEF_STMT (treeop0
);
2682 if (!has_single_use (treeop0
) || !is_copysign_call_with_1 (call0
))
2684 call0
= SSA_NAME_DEF_STMT (treeop1
);
2685 if (!has_single_use (treeop1
) || !is_copysign_call_with_1 (call0
))
2690 if (optab_handler (xorsign_optab
, mode
) == CODE_FOR_nothing
)
2693 gcall
*c
= as_a
<gcall
*> (call0
);
2694 treeop0
= gimple_call_arg (c
, 1);
2697 = gimple_build_call_internal (IFN_XORSIGN
, 2, treeop1
, treeop0
);
2698 gimple_set_lhs (call_stmt
, lhs
);
2699 gimple_set_location (call_stmt
, loc
);
2700 gsi_replace (gsi
, call_stmt
, true);
2707 /* Process a single gimple statement STMT, which has a MULT_EXPR as
2708 its rhs, and try to convert it into a WIDEN_MULT_EXPR. The return
2709 value is true iff we converted the statement. */
2712 convert_mult_to_widen (gimple
*stmt
, gimple_stmt_iterator
*gsi
)
2714 tree lhs
, rhs1
, rhs2
, type
, type1
, type2
;
2715 enum insn_code handler
;
2716 scalar_int_mode to_mode
, from_mode
, actual_mode
;
2718 int actual_precision
;
2719 location_t loc
= gimple_location (stmt
);
2720 bool from_unsigned1
, from_unsigned2
;
2722 lhs
= gimple_assign_lhs (stmt
);
2723 type
= TREE_TYPE (lhs
);
2724 if (TREE_CODE (type
) != INTEGER_TYPE
)
2727 if (!is_widening_mult_p (stmt
, &type1
, &rhs1
, &type2
, &rhs2
))
2730 /* if any one of rhs1 and rhs2 is subject to abnormal coalescing,
2731 avoid the tranform. */
2732 if ((TREE_CODE (rhs1
) == SSA_NAME
2733 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs1
))
2734 || (TREE_CODE (rhs2
) == SSA_NAME
2735 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (rhs2
)))
2738 to_mode
= SCALAR_INT_TYPE_MODE (type
);
2739 from_mode
= SCALAR_INT_TYPE_MODE (type1
);
2740 if (to_mode
== from_mode
)
2743 from_unsigned1
= TYPE_UNSIGNED (type1
);
2744 from_unsigned2
= TYPE_UNSIGNED (type2
);
2746 if (from_unsigned1
&& from_unsigned2
)
2747 op
= umul_widen_optab
;
2748 else if (!from_unsigned1
&& !from_unsigned2
)
2749 op
= smul_widen_optab
;
2751 op
= usmul_widen_optab
;
2753 handler
= find_widening_optab_handler_and_mode (op
, to_mode
, from_mode
,
2756 if (handler
== CODE_FOR_nothing
)
2758 if (op
!= smul_widen_optab
)
2760 /* We can use a signed multiply with unsigned types as long as
2761 there is a wider mode to use, or it is the smaller of the two
2762 types that is unsigned. Note that type1 >= type2, always. */
2763 if ((TYPE_UNSIGNED (type1
)
2764 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2765 || (TYPE_UNSIGNED (type2
)
2766 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2768 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2769 || GET_MODE_SIZE (to_mode
) <= GET_MODE_SIZE (from_mode
))
2773 op
= smul_widen_optab
;
2774 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2778 if (handler
== CODE_FOR_nothing
)
2781 from_unsigned1
= from_unsigned2
= false;
2785 /* Expand can synthesize smul_widen_optab if the target
2786 supports umul_widen_optab. */
2787 op
= umul_widen_optab
;
2788 handler
= find_widening_optab_handler_and_mode (op
, to_mode
,
2791 if (handler
== CODE_FOR_nothing
)
2796 /* Ensure that the inputs to the handler are in the correct precison
2797 for the opcode. This will be the full mode size. */
2798 actual_precision
= GET_MODE_PRECISION (actual_mode
);
2799 if (2 * actual_precision
> TYPE_PRECISION (type
))
2801 if (actual_precision
!= TYPE_PRECISION (type1
)
2802 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
2803 type1
= build_nonstandard_integer_type (actual_precision
, from_unsigned1
);
2804 if (!useless_type_conversion_p (type1
, TREE_TYPE (rhs1
)))
2806 if (TREE_CODE (rhs1
) == INTEGER_CST
)
2807 rhs1
= fold_convert (type1
, rhs1
);
2809 rhs1
= build_and_insert_cast (gsi
, loc
, type1
, rhs1
);
2811 if (actual_precision
!= TYPE_PRECISION (type2
)
2812 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
2813 type2
= build_nonstandard_integer_type (actual_precision
, from_unsigned2
);
2814 if (!useless_type_conversion_p (type2
, TREE_TYPE (rhs2
)))
2816 if (TREE_CODE (rhs2
) == INTEGER_CST
)
2817 rhs2
= fold_convert (type2
, rhs2
);
2819 rhs2
= build_and_insert_cast (gsi
, loc
, type2
, rhs2
);
2822 gimple_assign_set_rhs1 (stmt
, rhs1
);
2823 gimple_assign_set_rhs2 (stmt
, rhs2
);
2824 gimple_assign_set_rhs_code (stmt
, WIDEN_MULT_EXPR
);
2826 widen_mul_stats
.widen_mults_inserted
++;
2830 /* Process a single gimple statement STMT, which is found at the
2831 iterator GSI and has a either a PLUS_EXPR or a MINUS_EXPR as its
2832 rhs (given by CODE), and try to convert it into a
2833 WIDEN_MULT_PLUS_EXPR or a WIDEN_MULT_MINUS_EXPR. The return value
2834 is true iff we converted the statement. */
2837 convert_plusminus_to_widen (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
2838 enum tree_code code
)
2840 gimple
*rhs1_stmt
= NULL
, *rhs2_stmt
= NULL
;
2841 gimple
*conv1_stmt
= NULL
, *conv2_stmt
= NULL
, *conv_stmt
;
2842 tree type
, type1
, type2
, optype
;
2843 tree lhs
, rhs1
, rhs2
, mult_rhs1
, mult_rhs2
, add_rhs
;
2844 enum tree_code rhs1_code
= ERROR_MARK
, rhs2_code
= ERROR_MARK
;
2846 enum tree_code wmult_code
;
2847 enum insn_code handler
;
2848 scalar_mode to_mode
, from_mode
, actual_mode
;
2849 location_t loc
= gimple_location (stmt
);
2850 int actual_precision
;
2851 bool from_unsigned1
, from_unsigned2
;
2853 lhs
= gimple_assign_lhs (stmt
);
2854 type
= TREE_TYPE (lhs
);
2855 if ((TREE_CODE (type
) != INTEGER_TYPE
2856 && TREE_CODE (type
) != FIXED_POINT_TYPE
)
2857 || !type_has_mode_precision_p (type
))
2860 if (code
== MINUS_EXPR
)
2861 wmult_code
= WIDEN_MULT_MINUS_EXPR
;
2863 wmult_code
= WIDEN_MULT_PLUS_EXPR
;
2865 rhs1
= gimple_assign_rhs1 (stmt
);
2866 rhs2
= gimple_assign_rhs2 (stmt
);
2868 if (TREE_CODE (rhs1
) == SSA_NAME
)
2870 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2871 if (is_gimple_assign (rhs1_stmt
))
2872 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2875 if (TREE_CODE (rhs2
) == SSA_NAME
)
2877 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2878 if (is_gimple_assign (rhs2_stmt
))
2879 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2882 /* Allow for one conversion statement between the multiply
2883 and addition/subtraction statement. If there are more than
2884 one conversions then we assume they would invalidate this
2885 transformation. If that's not the case then they should have
2886 been folded before now. */
2887 if (CONVERT_EXPR_CODE_P (rhs1_code
))
2889 conv1_stmt
= rhs1_stmt
;
2890 rhs1
= gimple_assign_rhs1 (rhs1_stmt
);
2891 if (TREE_CODE (rhs1
) == SSA_NAME
)
2893 rhs1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2894 if (is_gimple_assign (rhs1_stmt
))
2895 rhs1_code
= gimple_assign_rhs_code (rhs1_stmt
);
2900 if (CONVERT_EXPR_CODE_P (rhs2_code
))
2902 conv2_stmt
= rhs2_stmt
;
2903 rhs2
= gimple_assign_rhs1 (rhs2_stmt
);
2904 if (TREE_CODE (rhs2
) == SSA_NAME
)
2906 rhs2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2907 if (is_gimple_assign (rhs2_stmt
))
2908 rhs2_code
= gimple_assign_rhs_code (rhs2_stmt
);
2914 /* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
2915 is_widening_mult_p, but we still need the rhs returns.
2917 It might also appear that it would be sufficient to use the existing
2918 operands of the widening multiply, but that would limit the choice of
2919 multiply-and-accumulate instructions.
2921 If the widened-multiplication result has more than one uses, it is
2922 probably wiser not to do the conversion. Also restrict this operation
2923 to single basic block to avoid moving the multiply to a different block
2924 with a higher execution frequency. */
2925 if (code
== PLUS_EXPR
2926 && (rhs1_code
== MULT_EXPR
|| rhs1_code
== WIDEN_MULT_EXPR
))
2928 if (!has_single_use (rhs1
)
2929 || gimple_bb (rhs1_stmt
) != gimple_bb (stmt
)
2930 || !is_widening_mult_p (rhs1_stmt
, &type1
, &mult_rhs1
,
2931 &type2
, &mult_rhs2
))
2934 conv_stmt
= conv1_stmt
;
2936 else if (rhs2_code
== MULT_EXPR
|| rhs2_code
== WIDEN_MULT_EXPR
)
2938 if (!has_single_use (rhs2
)
2939 || gimple_bb (rhs2_stmt
) != gimple_bb (stmt
)
2940 || !is_widening_mult_p (rhs2_stmt
, &type1
, &mult_rhs1
,
2941 &type2
, &mult_rhs2
))
2944 conv_stmt
= conv2_stmt
;
2949 to_mode
= SCALAR_TYPE_MODE (type
);
2950 from_mode
= SCALAR_TYPE_MODE (type1
);
2951 if (to_mode
== from_mode
)
2954 from_unsigned1
= TYPE_UNSIGNED (type1
);
2955 from_unsigned2
= TYPE_UNSIGNED (type2
);
2958 /* There's no such thing as a mixed sign madd yet, so use a wider mode. */
2959 if (from_unsigned1
!= from_unsigned2
)
2961 if (!INTEGRAL_TYPE_P (type
))
2963 /* We can use a signed multiply with unsigned types as long as
2964 there is a wider mode to use, or it is the smaller of the two
2965 types that is unsigned. Note that type1 >= type2, always. */
2967 && TYPE_PRECISION (type1
) == GET_MODE_PRECISION (from_mode
))
2969 && TYPE_PRECISION (type2
) == GET_MODE_PRECISION (from_mode
)))
2971 if (!GET_MODE_WIDER_MODE (from_mode
).exists (&from_mode
)
2972 || GET_MODE_SIZE (from_mode
) >= GET_MODE_SIZE (to_mode
))
2976 from_unsigned1
= from_unsigned2
= false;
2977 optype
= build_nonstandard_integer_type (GET_MODE_PRECISION (from_mode
),
2981 /* If there was a conversion between the multiply and addition
2982 then we need to make sure it fits a multiply-and-accumulate.
2983 The should be a single mode change which does not change the
2987 /* We use the original, unmodified data types for this. */
2988 tree from_type
= TREE_TYPE (gimple_assign_rhs1 (conv_stmt
));
2989 tree to_type
= TREE_TYPE (gimple_assign_lhs (conv_stmt
));
2990 int data_size
= TYPE_PRECISION (type1
) + TYPE_PRECISION (type2
);
2991 bool is_unsigned
= TYPE_UNSIGNED (type1
) && TYPE_UNSIGNED (type2
);
2993 if (TYPE_PRECISION (from_type
) > TYPE_PRECISION (to_type
))
2995 /* Conversion is a truncate. */
2996 if (TYPE_PRECISION (to_type
) < data_size
)
2999 else if (TYPE_PRECISION (from_type
) < TYPE_PRECISION (to_type
))
3001 /* Conversion is an extend. Check it's the right sort. */
3002 if (TYPE_UNSIGNED (from_type
) != is_unsigned
3003 && !(is_unsigned
&& TYPE_PRECISION (from_type
) > data_size
))
3006 /* else convert is a no-op for our purposes. */
3009 /* Verify that the machine can perform a widening multiply
3010 accumulate in this mode/signedness combination, otherwise
3011 this transformation is likely to pessimize code. */
3012 this_optab
= optab_for_tree_code (wmult_code
, optype
, optab_default
);
3013 handler
= find_widening_optab_handler_and_mode (this_optab
, to_mode
,
3014 from_mode
, &actual_mode
);
3016 if (handler
== CODE_FOR_nothing
)
3019 /* Ensure that the inputs to the handler are in the correct precison
3020 for the opcode. This will be the full mode size. */
3021 actual_precision
= GET_MODE_PRECISION (actual_mode
);
3022 if (actual_precision
!= TYPE_PRECISION (type1
)
3023 || from_unsigned1
!= TYPE_UNSIGNED (type1
))
3024 type1
= build_nonstandard_integer_type (actual_precision
, from_unsigned1
);
3025 if (!useless_type_conversion_p (type1
, TREE_TYPE (mult_rhs1
)))
3027 if (TREE_CODE (mult_rhs1
) == INTEGER_CST
)
3028 mult_rhs1
= fold_convert (type1
, mult_rhs1
);
3030 mult_rhs1
= build_and_insert_cast (gsi
, loc
, type1
, mult_rhs1
);
3032 if (actual_precision
!= TYPE_PRECISION (type2
)
3033 || from_unsigned2
!= TYPE_UNSIGNED (type2
))
3034 type2
= build_nonstandard_integer_type (actual_precision
, from_unsigned2
);
3035 if (!useless_type_conversion_p (type2
, TREE_TYPE (mult_rhs2
)))
3037 if (TREE_CODE (mult_rhs2
) == INTEGER_CST
)
3038 mult_rhs2
= fold_convert (type2
, mult_rhs2
);
3040 mult_rhs2
= build_and_insert_cast (gsi
, loc
, type2
, mult_rhs2
);
3043 if (!useless_type_conversion_p (type
, TREE_TYPE (add_rhs
)))
3044 add_rhs
= build_and_insert_cast (gsi
, loc
, type
, add_rhs
);
3046 gimple_assign_set_rhs_with_ops (gsi
, wmult_code
, mult_rhs1
, mult_rhs2
,
3048 update_stmt (gsi_stmt (*gsi
));
3049 widen_mul_stats
.maccs_inserted
++;
3053 /* Given a result MUL_RESULT which is a result of a multiplication of OP1 and
3054 OP2 and which we know is used in statements that can be, together with the
3055 multiplication, converted to FMAs, perform the transformation. */
3058 convert_mult_to_fma_1 (tree mul_result
, tree op1
, tree op2
)
3060 tree type
= TREE_TYPE (mul_result
);
3062 imm_use_iterator imm_iter
;
3065 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, mul_result
)
3067 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
3068 tree addop
, mulop1
= op1
, result
= mul_result
;
3069 bool negate_p
= false;
3070 gimple_seq seq
= NULL
;
3072 if (is_gimple_debug (use_stmt
))
3075 if (is_gimple_assign (use_stmt
)
3076 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3078 result
= gimple_assign_lhs (use_stmt
);
3079 use_operand_p use_p
;
3080 gimple
*neguse_stmt
;
3081 single_imm_use (gimple_assign_lhs (use_stmt
), &use_p
, &neguse_stmt
);
3082 gsi_remove (&gsi
, true);
3083 release_defs (use_stmt
);
3085 use_stmt
= neguse_stmt
;
3086 gsi
= gsi_for_stmt (use_stmt
);
3090 tree cond
, else_value
, ops
[3], len
, bias
;
3092 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
,
3096 addop
= ops
[0] == result
? ops
[1] : ops
[0];
3098 if (code
== MINUS_EXPR
)
3100 if (ops
[0] == result
)
3101 /* a * b - c -> a * b + (-c) */
3102 addop
= gimple_build (&seq
, NEGATE_EXPR
, type
, addop
);
3104 /* a - b * c -> (-b) * c + a */
3105 negate_p
= !negate_p
;
3109 mulop1
= gimple_build (&seq
, NEGATE_EXPR
, type
, mulop1
);
3112 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3116 = gimple_build_call_internal (IFN_COND_LEN_FMA
, 7, cond
, mulop1
, op2
,
3117 addop
, else_value
, len
, bias
);
3119 fma_stmt
= gimple_build_call_internal (IFN_COND_FMA
, 5, cond
, mulop1
,
3120 op2
, addop
, else_value
);
3122 fma_stmt
= gimple_build_call_internal (IFN_FMA
, 3, mulop1
, op2
, addop
);
3123 gimple_set_lhs (fma_stmt
, gimple_get_lhs (use_stmt
));
3124 gimple_call_set_nothrow (fma_stmt
, !stmt_can_throw_internal (cfun
,
3126 gsi_replace (&gsi
, fma_stmt
, true);
3127 /* Follow all SSA edges so that we generate FMS, FNMA and FNMS
3128 regardless of where the negation occurs. */
3129 gimple
*orig_stmt
= gsi_stmt (gsi
);
3130 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3132 if (maybe_clean_or_replace_eh_stmt (orig_stmt
, gsi_stmt (gsi
)))
3134 update_stmt (gsi_stmt (gsi
));
3137 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3139 fprintf (dump_file
, "Generated FMA ");
3140 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3141 fprintf (dump_file
, "\n");
3144 /* If the FMA result is negated in a single use, fold the negation
3146 orig_stmt
= gsi_stmt (gsi
);
3147 use_operand_p use_p
;
3149 if (is_gimple_call (orig_stmt
)
3150 && gimple_call_internal_p (orig_stmt
)
3151 && gimple_call_lhs (orig_stmt
)
3152 && TREE_CODE (gimple_call_lhs (orig_stmt
)) == SSA_NAME
3153 && single_imm_use (gimple_call_lhs (orig_stmt
), &use_p
, &neg_stmt
)
3154 && is_gimple_assign (neg_stmt
)
3155 && gimple_assign_rhs_code (neg_stmt
) == NEGATE_EXPR
3156 && !stmt_could_throw_p (cfun
, neg_stmt
))
3158 gsi
= gsi_for_stmt (neg_stmt
);
3159 if (fold_stmt (&gsi
, follow_all_ssa_edges
))
3161 if (maybe_clean_or_replace_eh_stmt (neg_stmt
, gsi_stmt (gsi
)))
3163 update_stmt (gsi_stmt (gsi
));
3164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3166 fprintf (dump_file
, "Folded FMA negation ");
3167 print_gimple_stmt (dump_file
, gsi_stmt (gsi
), 0, TDF_NONE
);
3168 fprintf (dump_file
, "\n");
3173 widen_mul_stats
.fmas_inserted
++;
3177 /* Data necessary to perform the actual transformation from a multiplication
3178 and an addition to an FMA after decision is taken it should be done and to
3179 then delete the multiplication statement from the function IL. */
3181 struct fma_transformation_info
3189 /* Structure containing the current state of FMA deferring, i.e. whether we are
3190 deferring, whether to continue deferring, and all data necessary to come
3191 back and perform all deferred transformations. */
3193 class fma_deferring_state
3196 /* Class constructor. Pass true as PERFORM_DEFERRING in order to actually
3197 do any deferring. */
3199 fma_deferring_state (bool perform_deferring
)
3200 : m_candidates (), m_mul_result_set (), m_initial_phi (NULL
),
3201 m_last_result (NULL_TREE
), m_deferring_p (perform_deferring
) {}
3203 /* List of FMA candidates for which we the transformation has been determined
3204 possible but we at this point in BB analysis we do not consider them
3206 auto_vec
<fma_transformation_info
, 8> m_candidates
;
3208 /* Set of results of multiplication that are part of an already deferred FMA
3210 hash_set
<tree
> m_mul_result_set
;
3212 /* The PHI that supposedly feeds back result of a FMA to another over loop
3214 gphi
*m_initial_phi
;
3216 /* Result of the last produced FMA candidate or NULL if there has not been
3220 /* If true, deferring might still be profitable. If false, transform all
3221 candidates and no longer defer. */
3225 /* Transform all deferred FMA candidates and mark STATE as no longer
3229 cancel_fma_deferring (fma_deferring_state
*state
)
3231 if (!state
->m_deferring_p
)
3234 for (unsigned i
= 0; i
< state
->m_candidates
.length (); i
++)
3236 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3237 fprintf (dump_file
, "Generating deferred FMA\n");
3239 const fma_transformation_info
&fti
= state
->m_candidates
[i
];
3240 convert_mult_to_fma_1 (fti
.mul_result
, fti
.op1
, fti
.op2
);
3242 gimple_stmt_iterator gsi
= gsi_for_stmt (fti
.mul_stmt
);
3243 gsi_remove (&gsi
, true);
3244 release_defs (fti
.mul_stmt
);
3246 state
->m_deferring_p
= false;
3249 /* If OP is an SSA name defined by a PHI node, return the PHI statement.
3250 Otherwise return NULL. */
3253 result_of_phi (tree op
)
3255 if (TREE_CODE (op
) != SSA_NAME
)
3258 return dyn_cast
<gphi
*> (SSA_NAME_DEF_STMT (op
));
3261 /* After processing statements of a BB and recording STATE, return true if the
3262 initial phi is fed by the last FMA candidate result ore one such result from
3263 previously processed BBs marked in LAST_RESULT_SET. */
3266 last_fma_candidate_feeds_initial_phi (fma_deferring_state
*state
,
3267 hash_set
<tree
> *last_result_set
)
3271 FOR_EACH_PHI_ARG (use
, state
->m_initial_phi
, iter
, SSA_OP_USE
)
3273 tree t
= USE_FROM_PTR (use
);
3274 if (t
== state
->m_last_result
3275 || last_result_set
->contains (t
))
3282 /* Combine the multiplication at MUL_STMT with operands MULOP1 and MULOP2
3283 with uses in additions and subtractions to form fused multiply-add
3284 operations. Returns true if successful and MUL_STMT should be removed.
3285 If MUL_COND is nonnull, the multiplication in MUL_STMT is conditional
3286 on MUL_COND, otherwise it is unconditional.
3288 If STATE indicates that we are deferring FMA transformation, that means
3289 that we do not produce FMAs for basic blocks which look like:
3292 # accumulator_111 = PHI <0.0(5), accumulator_66(6)>
3294 accumulator_66 = _65 + accumulator_111;
3296 or its unrolled version, i.e. with several FMA candidates that feed result
3297 of one into the addend of another. Instead, we add them to a list in STATE
3298 and if we later discover an FMA candidate that is not part of such a chain,
3299 we go back and perform all deferred past candidates. */
3302 convert_mult_to_fma (gimple
*mul_stmt
, tree op1
, tree op2
,
3303 fma_deferring_state
*state
, tree mul_cond
= NULL_TREE
,
3304 tree mul_len
= NULL_TREE
, tree mul_bias
= NULL_TREE
)
3306 tree mul_result
= gimple_get_lhs (mul_stmt
);
3307 /* If there isn't a LHS then this can't be an FMA. There can be no LHS
3308 if the statement was left just for the side-effects. */
3311 tree type
= TREE_TYPE (mul_result
);
3312 gimple
*use_stmt
, *neguse_stmt
;
3313 use_operand_p use_p
;
3314 imm_use_iterator imm_iter
;
3316 if (FLOAT_TYPE_P (type
)
3317 && flag_fp_contract_mode
!= FP_CONTRACT_FAST
)
3320 /* We don't want to do bitfield reduction ops. */
3321 if (INTEGRAL_TYPE_P (type
)
3322 && (!type_has_mode_precision_p (type
) || TYPE_OVERFLOW_TRAPS (type
)))
3325 /* If the target doesn't support it, don't generate it. We assume that
3326 if fma isn't available then fms, fnma or fnms are not either. */
3327 optimization_type opt_type
= bb_optimization_type (gimple_bb (mul_stmt
));
3328 if (!direct_internal_fn_supported_p (IFN_FMA
, type
, opt_type
))
3331 /* If the multiplication has zero uses, it is kept around probably because
3332 of -fnon-call-exceptions. Don't optimize it away in that case,
3334 if (has_zero_uses (mul_result
))
3338 = (state
->m_deferring_p
3339 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (type
)),
3340 param_avoid_fma_max_bits
));
3341 bool defer
= check_defer
;
3342 bool seen_negate_p
= false;
3344 /* There is no numerical difference between fused and unfused integer FMAs,
3345 and the assumption below that FMA is as cheap as addition is unlikely
3346 to be true, especially if the multiplication occurs multiple times on
3347 the same chain. E.g., for something like:
3349 (((a * b) + c) >> 1) + (a * b)
3351 we do not want to duplicate the a * b into two additions, not least
3352 because the result is not a natural FMA chain. */
3353 if (ANY_INTEGRAL_TYPE_P (type
)
3354 && !has_single_use (mul_result
))
3357 if (!dbg_cnt (form_fma
))
3360 /* Make sure that the multiplication statement becomes dead after
3361 the transformation, thus that all uses are transformed to FMAs.
3362 This means we assume that an FMA operation has the same cost
3364 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, mul_result
)
3366 tree result
= mul_result
;
3367 bool negate_p
= false;
3369 use_stmt
= USE_STMT (use_p
);
3371 if (is_gimple_debug (use_stmt
))
3374 /* For now restrict this operations to single basic blocks. In theory
3375 we would want to support sinking the multiplication in
3381 to form a fma in the then block and sink the multiplication to the
3383 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3386 /* A negate on the multiplication leads to FNMA. */
3387 if (is_gimple_assign (use_stmt
)
3388 && gimple_assign_rhs_code (use_stmt
) == NEGATE_EXPR
)
3393 /* If (due to earlier missed optimizations) we have two
3394 negates of the same value, treat them as equivalent
3395 to a single negate with multiple uses. */
3399 result
= gimple_assign_lhs (use_stmt
);
3401 /* Make sure the negate statement becomes dead with this
3402 single transformation. */
3403 if (!single_imm_use (gimple_assign_lhs (use_stmt
),
3404 &use_p
, &neguse_stmt
))
3407 /* Make sure the multiplication isn't also used on that stmt. */
3408 FOR_EACH_PHI_OR_STMT_USE (usep
, neguse_stmt
, iter
, SSA_OP_USE
)
3409 if (USE_FROM_PTR (usep
) == mul_result
)
3413 use_stmt
= neguse_stmt
;
3414 if (gimple_bb (use_stmt
) != gimple_bb (mul_stmt
))
3417 negate_p
= seen_negate_p
= true;
3420 tree cond
, else_value
, ops
[3], len
, bias
;
3422 if (!can_interpret_as_conditional_op_p (use_stmt
, &cond
, &code
, ops
,
3423 &else_value
, &len
, &bias
))
3429 if (ops
[1] == result
)
3430 negate_p
= !negate_p
;
3435 /* FMA can only be formed from PLUS and MINUS. */
3441 /* For COND_LEN_* operations, we may have dummpy mask which is
3442 the all true mask. Such TREE type may be mul_cond != cond
3443 but we still consider they are equal. */
3444 if (mul_cond
&& cond
!= mul_cond
3445 && !(integer_truep (mul_cond
) && integer_truep (cond
)))
3448 if (else_value
== result
)
3451 if (!direct_internal_fn_supported_p (IFN_COND_LEN_FMA
, type
,
3457 poly_int64 mul_value
, value
;
3458 if (poly_int_tree_p (mul_len
, &mul_value
)
3459 && poly_int_tree_p (len
, &value
)
3460 && maybe_ne (mul_value
, value
))
3462 else if (mul_len
!= len
)
3465 if (wi::to_widest (mul_bias
) != wi::to_widest (bias
))
3471 if (mul_cond
&& cond
!= mul_cond
)
3476 if (cond
== result
|| else_value
== result
)
3478 if (!direct_internal_fn_supported_p (IFN_COND_FMA
, type
,
3484 /* If the subtrahend (OPS[1]) is computed by a MULT_EXPR that
3485 we'll visit later, we might be able to get a more profitable
3487 OTOH, if we don't, a negate / fma pair has likely lower latency
3488 that a mult / subtract pair. */
3489 if (code
== MINUS_EXPR
3492 && !direct_internal_fn_supported_p (IFN_FMS
, type
, opt_type
)
3493 && direct_internal_fn_supported_p (IFN_FNMA
, type
, opt_type
)
3494 && TREE_CODE (ops
[1]) == SSA_NAME
3495 && has_single_use (ops
[1]))
3497 gimple
*stmt2
= SSA_NAME_DEF_STMT (ops
[1]);
3498 if (is_gimple_assign (stmt2
)
3499 && gimple_assign_rhs_code (stmt2
) == MULT_EXPR
)
3503 /* We can't handle a * b + a * b. */
3504 if (ops
[0] == ops
[1])
3506 /* If deferring, make sure we are not looking at an instruction that
3507 wouldn't have existed if we were not. */
3508 if (state
->m_deferring_p
3509 && (state
->m_mul_result_set
.contains (ops
[0])
3510 || state
->m_mul_result_set
.contains (ops
[1])))
3515 tree use_lhs
= gimple_get_lhs (use_stmt
);
3516 if (state
->m_last_result
)
3518 if (ops
[1] == state
->m_last_result
3519 || ops
[0] == state
->m_last_result
)
3526 gcc_checking_assert (!state
->m_initial_phi
);
3528 if (ops
[0] == result
)
3529 phi
= result_of_phi (ops
[1]);
3532 gcc_assert (ops
[1] == result
);
3533 phi
= result_of_phi (ops
[0]);
3538 state
->m_initial_phi
= phi
;
3545 state
->m_last_result
= use_lhs
;
3546 check_defer
= false;
3551 /* While it is possible to validate whether or not the exact form that
3552 we've recognized is available in the backend, the assumption is that
3553 if the deferring logic above did not trigger, the transformation is
3554 never a loss. For instance, suppose the target only has the plain FMA
3555 pattern available. Consider a*b-c -> fma(a,b,-c): we've exchanged
3556 MUL+SUB for FMA+NEG, which is still two operations. Consider
3557 -(a*b)-c -> fma(-a,b,-c): we still have 3 operations, but in the FMA
3558 form the two NEGs are independent and could be run in parallel. */
3563 fma_transformation_info fti
;
3564 fti
.mul_stmt
= mul_stmt
;
3565 fti
.mul_result
= mul_result
;
3568 state
->m_candidates
.safe_push (fti
);
3569 state
->m_mul_result_set
.add (mul_result
);
3571 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3573 fprintf (dump_file
, "Deferred generating FMA for multiplication ");
3574 print_gimple_stmt (dump_file
, mul_stmt
, 0, TDF_NONE
);
3575 fprintf (dump_file
, "\n");
3582 if (state
->m_deferring_p
)
3583 cancel_fma_deferring (state
);
3584 convert_mult_to_fma_1 (mul_result
, op1
, op2
);
3590 /* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
3591 a check for non-zero like:
3592 _1 = x_4(D) * y_5(D);
3595 goto <bb 3>; [50.00%]
3597 goto <bb 4>; [50.00%]
3599 <bb 3> [local count: 536870913]:
3604 <bb 4> [local count: 1073741824]:
3605 # iftmp.0_3 = PHI <_10(3), 0(2)>
3606 then in addition to using .MUL_OVERFLOW (x_4(D), y_5(D)) we can also
3607 optimize the x_4(D) != 0 condition to 1. */
3610 maybe_optimize_guarding_check (vec
<gimple
*> &mul_stmts
, gimple
*cond_stmt
,
3611 gimple
*div_stmt
, bool *cfg_changed
)
3613 basic_block bb
= gimple_bb (cond_stmt
);
3614 if (gimple_bb (div_stmt
) != bb
|| !single_pred_p (bb
))
3616 edge pred_edge
= single_pred_edge (bb
);
3617 basic_block pred_bb
= pred_edge
->src
;
3618 if (EDGE_COUNT (pred_bb
->succs
) != 2)
3620 edge other_edge
= EDGE_SUCC (pred_bb
, EDGE_SUCC (pred_bb
, 0) == pred_edge
);
3621 edge other_succ_edge
= NULL
;
3622 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3624 if (EDGE_COUNT (bb
->succs
) != 2)
3626 other_succ_edge
= EDGE_SUCC (bb
, 0);
3627 if (gimple_cond_code (cond_stmt
) == NE_EXPR
)
3629 if (other_succ_edge
->flags
& EDGE_TRUE_VALUE
)
3630 other_succ_edge
= EDGE_SUCC (bb
, 1);
3632 else if (other_succ_edge
->flags
& EDGE_FALSE_VALUE
)
3633 other_succ_edge
= EDGE_SUCC (bb
, 0);
3634 if (other_edge
->dest
!= other_succ_edge
->dest
)
3637 else if (!single_succ_p (bb
) || other_edge
->dest
!= single_succ (bb
))
3639 gcond
*zero_cond
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (pred_bb
));
3640 if (zero_cond
== NULL
3641 || (gimple_cond_code (zero_cond
)
3642 != ((pred_edge
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
))
3643 || !integer_zerop (gimple_cond_rhs (zero_cond
)))
3645 tree zero_cond_lhs
= gimple_cond_lhs (zero_cond
);
3646 if (TREE_CODE (zero_cond_lhs
) != SSA_NAME
)
3648 if (gimple_assign_rhs2 (div_stmt
) != zero_cond_lhs
)
3650 /* Allow the divisor to be result of a same precision cast
3651 from zero_cond_lhs. */
3652 tree rhs2
= gimple_assign_rhs2 (div_stmt
);
3653 if (TREE_CODE (rhs2
) != SSA_NAME
)
3655 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3656 if (!gimple_assign_cast_p (g
)
3657 || gimple_assign_rhs1 (g
) != gimple_cond_lhs (zero_cond
)
3658 || !INTEGRAL_TYPE_P (TREE_TYPE (zero_cond_lhs
))
3659 || (TYPE_PRECISION (TREE_TYPE (zero_cond_lhs
))
3660 != TYPE_PRECISION (TREE_TYPE (rhs2
))))
3663 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
3664 mul_stmts
.quick_push (div_stmt
);
3665 if (is_gimple_debug (gsi_stmt (gsi
)))
3666 gsi_next_nondebug (&gsi
);
3667 unsigned cast_count
= 0;
3668 while (gsi_stmt (gsi
) != cond_stmt
)
3670 /* If original mul_stmt has a single use, allow it in the same bb,
3671 we are looking then just at __builtin_mul_overflow_p.
3672 Though, in that case the original mul_stmt will be replaced
3673 by .MUL_OVERFLOW, REALPART_EXPR and IMAGPART_EXPR stmts. */
3677 FOR_EACH_VEC_ELT (mul_stmts
, i
, mul_stmt
)
3679 if (gsi_stmt (gsi
) == mul_stmt
)
3685 if (!ok
&& gimple_assign_cast_p (gsi_stmt (gsi
)) && ++cast_count
< 4)
3689 gsi_next_nondebug (&gsi
);
3691 if (gimple_code (cond_stmt
) == GIMPLE_COND
)
3693 basic_block succ_bb
= other_edge
->dest
;
3694 for (gphi_iterator gpi
= gsi_start_phis (succ_bb
); !gsi_end_p (gpi
);
3697 gphi
*phi
= gpi
.phi ();
3698 tree v1
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3699 tree v2
= gimple_phi_arg_def (phi
, other_succ_edge
->dest_idx
);
3700 if (!operand_equal_p (v1
, v2
, 0))
3706 tree lhs
= gimple_assign_lhs (cond_stmt
);
3707 if (!lhs
|| !INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))
3709 gsi_next_nondebug (&gsi
);
3710 if (!gsi_end_p (gsi
))
3712 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3714 gimple
*cast_stmt
= gsi_stmt (gsi
);
3715 if (!gimple_assign_cast_p (cast_stmt
))
3717 tree new_lhs
= gimple_assign_lhs (cast_stmt
);
3718 gsi_next_nondebug (&gsi
);
3719 if (!gsi_end_p (gsi
)
3721 || !INTEGRAL_TYPE_P (TREE_TYPE (new_lhs
))
3722 || TYPE_PRECISION (TREE_TYPE (new_lhs
)) <= 1)
3726 edge succ_edge
= single_succ_edge (bb
);
3727 basic_block succ_bb
= succ_edge
->dest
;
3728 gsi
= gsi_start_phis (succ_bb
);
3729 if (gsi_end_p (gsi
))
3731 gphi
*phi
= as_a
<gphi
*> (gsi_stmt (gsi
));
3733 if (!gsi_end_p (gsi
))
3735 if (gimple_phi_arg_def (phi
, succ_edge
->dest_idx
) != lhs
)
3737 tree other_val
= gimple_phi_arg_def (phi
, other_edge
->dest_idx
);
3738 if (gimple_assign_rhs_code (cond_stmt
) == COND_EXPR
)
3740 tree cond
= gimple_assign_rhs1 (cond_stmt
);
3741 if (TREE_CODE (cond
) == NE_EXPR
)
3743 if (!operand_equal_p (other_val
,
3744 gimple_assign_rhs3 (cond_stmt
), 0))
3747 else if (!operand_equal_p (other_val
,
3748 gimple_assign_rhs2 (cond_stmt
), 0))
3751 else if (gimple_assign_rhs_code (cond_stmt
) == NE_EXPR
)
3753 if (!integer_zerop (other_val
))
3756 else if (!integer_onep (other_val
))
3759 if (pred_edge
->flags
& EDGE_TRUE_VALUE
)
3760 gimple_cond_make_true (zero_cond
);
3762 gimple_cond_make_false (zero_cond
);
3763 update_stmt (zero_cond
);
3764 *cfg_changed
= true;
3767 /* Helper function for arith_overflow_check_p. Return true
3768 if VAL1 is equal to VAL2 cast to corresponding integral type
3769 with other signedness or vice versa. */
3772 arith_cast_equal_p (tree val1
, tree val2
)
3774 if (TREE_CODE (val1
) == INTEGER_CST
&& TREE_CODE (val2
) == INTEGER_CST
)
3775 return wi::eq_p (wi::to_wide (val1
), wi::to_wide (val2
));
3776 else if (TREE_CODE (val1
) != SSA_NAME
|| TREE_CODE (val2
) != SSA_NAME
)
3778 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val1
))
3779 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val1
)) == val2
)
3781 if (gimple_assign_cast_p (SSA_NAME_DEF_STMT (val2
))
3782 && gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val2
)) == val1
)
3787 /* Helper function of match_arith_overflow. Return 1
3788 if USE_STMT is unsigned overflow check ovf != 0 for
3789 STMT, -1 if USE_STMT is unsigned overflow check ovf == 0
3793 arith_overflow_check_p (gimple
*stmt
, gimple
*cast_stmt
, gimple
*&use_stmt
,
3794 tree maxval
, tree
*other
)
3796 enum tree_code ccode
= ERROR_MARK
;
3797 tree crhs1
= NULL_TREE
, crhs2
= NULL_TREE
;
3798 enum tree_code code
= gimple_assign_rhs_code (stmt
);
3799 tree lhs
= gimple_assign_lhs (cast_stmt
? cast_stmt
: stmt
);
3800 tree rhs1
= gimple_assign_rhs1 (stmt
);
3801 tree rhs2
= gimple_assign_rhs2 (stmt
);
3802 tree multop
= NULL_TREE
, divlhs
= NULL_TREE
;
3803 gimple
*cur_use_stmt
= use_stmt
;
3805 if (code
== MULT_EXPR
)
3807 if (!is_gimple_assign (use_stmt
))
3809 if (gimple_assign_rhs_code (use_stmt
) != TRUNC_DIV_EXPR
)
3811 if (gimple_assign_rhs1 (use_stmt
) != lhs
)
3815 if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs1
))
3817 else if (arith_cast_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
))
3822 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
3824 else if (operand_equal_p (gimple_assign_rhs2 (use_stmt
), rhs2
, 0))
3828 if (stmt_ends_bb_p (use_stmt
))
3830 divlhs
= gimple_assign_lhs (use_stmt
);
3834 if (!single_imm_use (divlhs
, &use
, &cur_use_stmt
))
3836 if (cast_stmt
&& gimple_assign_cast_p (cur_use_stmt
))
3838 tree cast_lhs
= gimple_assign_lhs (cur_use_stmt
);
3839 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
3840 && TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
3841 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
3842 == TYPE_PRECISION (TREE_TYPE (divlhs
)))
3843 && single_imm_use (cast_lhs
, &use
, &cur_use_stmt
))
3852 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3854 ccode
= gimple_cond_code (cur_use_stmt
);
3855 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3856 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3858 else if (is_gimple_assign (cur_use_stmt
))
3860 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3862 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3863 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3864 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3866 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3868 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3869 if (COMPARISON_CLASS_P (cond
))
3871 ccode
= TREE_CODE (cond
);
3872 crhs1
= TREE_OPERAND (cond
, 0);
3873 crhs2
= TREE_OPERAND (cond
, 1);
3885 && ccode
== RSHIFT_EXPR
3887 && TREE_CODE (crhs2
) == INTEGER_CST
3888 && wi::to_widest (crhs2
) == TYPE_PRECISION (TREE_TYPE (maxval
)))
3890 tree shiftlhs
= gimple_assign_lhs (use_stmt
);
3894 if (!single_imm_use (shiftlhs
, &use
, &cur_use_stmt
))
3896 if (gimple_code (cur_use_stmt
) == GIMPLE_COND
)
3898 ccode
= gimple_cond_code (cur_use_stmt
);
3899 crhs1
= gimple_cond_lhs (cur_use_stmt
);
3900 crhs2
= gimple_cond_rhs (cur_use_stmt
);
3902 else if (is_gimple_assign (cur_use_stmt
))
3904 if (gimple_assign_rhs_class (cur_use_stmt
) == GIMPLE_BINARY_RHS
)
3906 ccode
= gimple_assign_rhs_code (cur_use_stmt
);
3907 crhs1
= gimple_assign_rhs1 (cur_use_stmt
);
3908 crhs2
= gimple_assign_rhs2 (cur_use_stmt
);
3910 else if (gimple_assign_rhs_code (cur_use_stmt
) == COND_EXPR
)
3912 tree cond
= gimple_assign_rhs1 (cur_use_stmt
);
3913 if (COMPARISON_CLASS_P (cond
))
3915 ccode
= TREE_CODE (cond
);
3916 crhs1
= TREE_OPERAND (cond
, 0);
3917 crhs2
= TREE_OPERAND (cond
, 1);
3924 enum tree_code sc
= gimple_assign_rhs_code (cur_use_stmt
);
3925 tree castlhs
= gimple_assign_lhs (cur_use_stmt
);
3926 if (!CONVERT_EXPR_CODE_P (sc
)
3928 || !INTEGRAL_TYPE_P (TREE_TYPE (castlhs
))
3929 || (TYPE_PRECISION (TREE_TYPE (castlhs
))
3930 > TYPE_PRECISION (TREE_TYPE (maxval
))))
3937 if ((ccode
!= EQ_EXPR
&& ccode
!= NE_EXPR
)
3938 || crhs1
!= shiftlhs
3939 || !integer_zerop (crhs2
))
3944 if (TREE_CODE_CLASS (ccode
) != tcc_comparison
)
3953 /* r = a + b; r > maxval or r <= maxval */
3955 && TREE_CODE (crhs2
) == INTEGER_CST
3956 && tree_int_cst_equal (crhs2
, maxval
))
3957 return ccode
== GT_EXPR
? 1 : -1;
3960 /* r = a - b; r > a or r <= a
3961 r = a + b; a > r or a <= r or b > r or b <= r. */
3962 if ((code
== MINUS_EXPR
&& crhs1
== lhs
&& crhs2
== rhs1
)
3963 || (code
== PLUS_EXPR
&& (crhs1
== rhs1
|| crhs1
== rhs2
)
3965 return ccode
== GT_EXPR
? 1 : -1;
3966 /* r = ~a; b > r or b <= r. */
3967 if (code
== BIT_NOT_EXPR
&& crhs2
== lhs
)
3971 return ccode
== GT_EXPR
? 1 : -1;
3978 /* r = a - b; a < r or a >= r
3979 r = a + b; r < a or r >= a or r < b or r >= b. */
3980 if ((code
== MINUS_EXPR
&& crhs1
== rhs1
&& crhs2
== lhs
)
3981 || (code
== PLUS_EXPR
&& crhs1
== lhs
3982 && (crhs2
== rhs1
|| crhs2
== rhs2
)))
3983 return ccode
== LT_EXPR
? 1 : -1;
3984 /* r = ~a; r < b or r >= b. */
3985 if (code
== BIT_NOT_EXPR
&& crhs1
== lhs
)
3989 return ccode
== LT_EXPR
? 1 : -1;
3994 /* r = a * b; _1 = r / a; _1 == b
3995 r = a * b; _1 = r / b; _1 == a
3996 r = a * b; _1 = r / a; _1 != b
3997 r = a * b; _1 = r / b; _1 != a. */
3998 if (code
== MULT_EXPR
)
4002 if ((crhs1
== divlhs
&& arith_cast_equal_p (crhs2
, multop
))
4003 || (crhs2
== divlhs
&& arith_cast_equal_p (crhs1
, multop
)))
4005 use_stmt
= cur_use_stmt
;
4006 return ccode
== NE_EXPR
? 1 : -1;
4009 else if ((crhs1
== divlhs
&& operand_equal_p (crhs2
, multop
, 0))
4010 || (crhs2
== divlhs
&& crhs1
== multop
))
4012 use_stmt
= cur_use_stmt
;
4013 return ccode
== NE_EXPR
? 1 : -1;
4023 extern bool gimple_unsigned_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4024 extern bool gimple_unsigned_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4025 extern bool gimple_unsigned_integer_sat_trunc (tree
, tree
*, tree (*)(tree
));
4027 extern bool gimple_signed_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4028 extern bool gimple_signed_integer_sat_sub (tree
, tree
*, tree (*)(tree
));
4029 extern bool gimple_signed_integer_sat_trunc (tree
, tree
*, tree (*)(tree
));
4032 build_saturation_binary_arith_call_and_replace (gimple_stmt_iterator
*gsi
,
4033 internal_fn fn
, tree lhs
,
4034 tree op_0
, tree op_1
)
4036 if (direct_internal_fn_supported_p (fn
, TREE_TYPE (lhs
), OPTIMIZE_FOR_BOTH
))
4038 gcall
*call
= gimple_build_call_internal (fn
, 2, op_0
, op_1
);
4039 gimple_call_set_lhs (call
, lhs
);
4040 gsi_replace (gsi
, call
, /* update_eh_info */ true);
4045 build_saturation_binary_arith_call_and_insert (gimple_stmt_iterator
*gsi
,
4046 internal_fn fn
, tree lhs
,
4047 tree op_0
, tree op_1
)
4049 if (!direct_internal_fn_supported_p (fn
, TREE_TYPE (op_0
), OPTIMIZE_FOR_BOTH
))
4052 gcall
*call
= gimple_build_call_internal (fn
, 2, op_0
, op_1
);
4053 gimple_call_set_lhs (call
, lhs
);
4054 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
4060 * Try to match saturation unsigned add with assign.
4063 * _9 = (long unsigned int) _8;
4067 * _12 = .SAT_ADD (_4, _6); */
4070 match_unsigned_saturation_add (gimple_stmt_iterator
*gsi
, gassign
*stmt
)
4073 tree lhs
= gimple_assign_lhs (stmt
);
4075 if (gimple_unsigned_integer_sat_add (lhs
, ops
, NULL
))
4076 build_saturation_binary_arith_call_and_replace (gsi
, IFN_SAT_ADD
, lhs
,
4081 * Try to match saturation add with PHI.
4082 * For unsigned integer:
4084 * _1 = x_3(D) + y_4(D);
4086 * goto <bb 3>; [INV]
4088 * goto <bb 4>; [INV]
4093 * # _2 = PHI <255(2), _1(3)>
4095 * <bb 4> [local count: 1073741824]:
4096 * _2 = .SAT_ADD (x_4(D), y_5(D));
4098 * For signed integer:
4099 * x.0_1 = (long unsigned int) x_7(D);
4100 * y.1_2 = (long unsigned int) y_8(D);
4101 * _3 = x.0_1 + y.1_2;
4102 * sum_9 = (int64_t) _3;
4103 * _4 = x_7(D) ^ y_8(D);
4104 * _5 = x_7(D) ^ sum_9;
4108 * goto <bb 3>; [41.00%]
4110 * goto <bb 4>; [59.00%]
4112 * _12 = (long int) _11;
4114 * _14 = _13 ^ 9223372036854775807;
4115 * # _6 = PHI <_14(3), sum_9(2)>
4117 * _6 = .SAT_ADD (x_5(D), y_6(D)); [tail call] */
4120 match_saturation_add (gimple_stmt_iterator
*gsi
, gphi
*phi
)
4122 if (gimple_phi_num_args (phi
) != 2)
4126 tree phi_result
= gimple_phi_result (phi
);
4128 if (!gimple_unsigned_integer_sat_add (phi_result
, ops
, NULL
)
4129 && !gimple_signed_integer_sat_add (phi_result
, ops
, NULL
))
4132 if (!TYPE_UNSIGNED (TREE_TYPE (ops
[0])) && TREE_CODE (ops
[1]) == INTEGER_CST
)
4133 ops
[1] = fold_convert (TREE_TYPE (ops
[0]), ops
[1]);
4135 return build_saturation_binary_arith_call_and_insert (gsi
, IFN_SAT_ADD
,
4141 * Try to match saturation unsigned sub.
4146 * _6 = .SAT_SUB (_4, _5); */
4149 match_unsigned_saturation_sub (gimple_stmt_iterator
*gsi
, gassign
*stmt
)
4152 tree lhs
= gimple_assign_lhs (stmt
);
4154 if (gimple_unsigned_integer_sat_sub (lhs
, ops
, NULL
))
4155 build_saturation_binary_arith_call_and_replace (gsi
, IFN_SAT_SUB
, lhs
,
4160 * Try to match saturation unsigned sub.
4161 * <bb 2> [local count: 1073741824]:
4162 * if (x_2(D) > y_3(D))
4163 * goto <bb 3>; [50.00%]
4165 * goto <bb 4>; [50.00%]
4167 * <bb 3> [local count: 536870912]:
4168 * _4 = x_2(D) - y_3(D);
4170 * <bb 4> [local count: 1073741824]:
4171 * # _1 = PHI <0(2), _4(3)>
4173 * <bb 4> [local count: 1073741824]:
4174 * _1 = .SAT_SUB (x_2(D), y_3(D)); */
4176 match_saturation_sub (gimple_stmt_iterator
*gsi
, gphi
*phi
)
4178 if (gimple_phi_num_args (phi
) != 2)
4182 tree phi_result
= gimple_phi_result (phi
);
4184 if (!gimple_unsigned_integer_sat_sub (phi_result
, ops
, NULL
)
4185 && !gimple_signed_integer_sat_sub (phi_result
, ops
, NULL
))
4188 return build_saturation_binary_arith_call_and_insert (gsi
, IFN_SAT_SUB
,
4194 * Try to match saturation unsigned sub.
4197 * overflow_5 = x_4(D) > 255;
4198 * _1 = (unsigned char) x_4(D);
4199 * _2 = (unsigned char) overflow_5;
4203 * _6 = .SAT_TRUNC (x_4(D));
4206 match_unsigned_saturation_trunc (gimple_stmt_iterator
*gsi
, gassign
*stmt
)
4209 tree lhs
= gimple_assign_lhs (stmt
);
4210 tree type
= TREE_TYPE (lhs
);
4212 if (gimple_unsigned_integer_sat_trunc (lhs
, ops
, NULL
)
4213 && direct_internal_fn_supported_p (IFN_SAT_TRUNC
,
4214 tree_pair (type
, TREE_TYPE (ops
[0])),
4217 gcall
*call
= gimple_build_call_internal (IFN_SAT_TRUNC
, 1, ops
[0]);
4218 gimple_call_set_lhs (call
, lhs
);
4219 gsi_replace (gsi
, call
, /* update_eh_info */ true);
4224 * Try to match saturation truncate.
4226 * x.0_1 = (unsigned long) x_4(D);
4227 * _2 = x.0_1 + 2147483648;
4228 * if (_2 > 4294967295)
4229 * goto <bb 4>; [50.00%]
4231 * goto <bb 3>; [50.00%]
4235 * ;; basic block 3, loop depth 0
4237 * trunc_5 = (int32_t) x_4(D);
4238 * goto <bb 5>; [100.00%]
4241 * ;; basic block 4, loop depth 0
4246 * _10 = _9 ^ 2147483647;
4249 * ;; basic block 5, loop depth 0
4252 * # _3 = PHI <trunc_5(3), _10(4)>
4254 * _6 = .SAT_TRUNC (x_4(D));
4258 match_saturation_trunc (gimple_stmt_iterator
*gsi
, gphi
*phi
)
4260 if (gimple_phi_num_args (phi
) != 2)
4264 tree phi_result
= gimple_phi_result (phi
);
4265 tree type
= TREE_TYPE (phi_result
);
4267 if (!gimple_unsigned_integer_sat_trunc (phi_result
, ops
, NULL
)
4268 && !gimple_signed_integer_sat_trunc (phi_result
, ops
, NULL
))
4271 if (!direct_internal_fn_supported_p (IFN_SAT_TRUNC
,
4272 tree_pair (type
, TREE_TYPE (ops
[0])),
4276 gcall
*call
= gimple_build_call_internal (IFN_SAT_TRUNC
, 1, ops
[0]);
4277 gimple_call_set_lhs (call
, phi_result
);
4278 gsi_insert_before (gsi
, call
, GSI_SAME_STMT
);
4283 /* Recognize for unsigned x
4286 where there are other uses of x and replace it with
4287 _7 = .SUB_OVERFLOW (y, z);
4288 x = REALPART_EXPR <_7>;
4289 _8 = IMAGPART_EXPR <_7>;
4291 and similarly for addition.
4298 where y and z have unsigned types with maximum max
4299 and there are other uses of x and all of those cast x
4300 back to that unsigned type and again replace it with
4301 _7 = .ADD_OVERFLOW (y, z);
4302 _9 = REALPART_EXPR <_7>;
4303 _8 = IMAGPART_EXPR <_7>;
4305 and replace (utype) x with _9.
4306 Or with x >> popcount (max) instead of x > max.
4312 _7 = .ADD_OVERFLOW (y, z);
4313 _8 = IMAGPART_EXPR <_7>;
4319 goto <bb 3>; [50.00%]
4321 goto <bb 4>; [50.00%]
4323 <bb 3> [local count: 536870913]:
4328 <bb 4> [local count: 1073741824]:
4329 # iftmp.0_3 = PHI <_10(3), 0(2)>
4331 _7 = .MUL_OVERFLOW (x, y);
4332 z = IMAGPART_EXPR <_7>;
4333 _8 = IMAGPART_EXPR <_7>;
4335 iftmp.0_3 = (int) _9; */
4338 match_arith_overflow (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
4339 enum tree_code code
, bool *cfg_changed
)
4341 tree lhs
= gimple_assign_lhs (stmt
);
4342 tree type
= TREE_TYPE (lhs
);
4343 use_operand_p use_p
;
4344 imm_use_iterator iter
;
4345 bool use_seen
= false;
4346 bool ovf_use_seen
= false;
4348 gimple
*add_stmt
= NULL
;
4349 bool add_first
= false;
4350 gimple
*cond_stmt
= NULL
;
4351 gimple
*cast_stmt
= NULL
;
4352 tree cast_lhs
= NULL_TREE
;
4354 gcc_checking_assert (code
== PLUS_EXPR
4355 || code
== MINUS_EXPR
4356 || code
== MULT_EXPR
4357 || code
== BIT_NOT_EXPR
);
4358 if (!INTEGRAL_TYPE_P (type
)
4359 || !TYPE_UNSIGNED (type
)
4360 || has_zero_uses (lhs
)
4361 || (code
!= PLUS_EXPR
4362 && code
!= MULT_EXPR
4363 && optab_handler (code
== MINUS_EXPR
? usubv4_optab
: uaddv4_optab
,
4364 TYPE_MODE (type
)) == CODE_FOR_nothing
))
4367 tree rhs1
= gimple_assign_rhs1 (stmt
);
4368 tree rhs2
= gimple_assign_rhs2 (stmt
);
4369 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4371 use_stmt
= USE_STMT (use_p
);
4372 if (is_gimple_debug (use_stmt
))
4375 tree other
= NULL_TREE
;
4376 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, NULL_TREE
, &other
))
4378 if (code
== BIT_NOT_EXPR
)
4381 if (TREE_CODE (other
) != SSA_NAME
)
4387 cond_stmt
= use_stmt
;
4389 ovf_use_seen
= true;
4394 if (code
== MULT_EXPR
4395 && cast_stmt
== NULL
4396 && gimple_assign_cast_p (use_stmt
))
4398 cast_lhs
= gimple_assign_lhs (use_stmt
);
4399 if (INTEGRAL_TYPE_P (TREE_TYPE (cast_lhs
))
4400 && !TYPE_UNSIGNED (TREE_TYPE (cast_lhs
))
4401 && (TYPE_PRECISION (TREE_TYPE (cast_lhs
))
4402 == TYPE_PRECISION (TREE_TYPE (lhs
))))
4403 cast_stmt
= use_stmt
;
4405 cast_lhs
= NULL_TREE
;
4408 if (ovf_use_seen
&& use_seen
)
4413 && code
== MULT_EXPR
4416 if (TREE_CODE (rhs1
) != SSA_NAME
4417 || (TREE_CODE (rhs2
) != SSA_NAME
&& TREE_CODE (rhs2
) != INTEGER_CST
))
4419 FOR_EACH_IMM_USE_FAST (use_p
, iter
, cast_lhs
)
4421 use_stmt
= USE_STMT (use_p
);
4422 if (is_gimple_debug (use_stmt
))
4425 if (arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4427 ovf_use_seen
= true;
4433 cast_lhs
= NULL_TREE
;
4436 tree maxval
= NULL_TREE
;
4438 || (code
!= MULT_EXPR
&& (code
== BIT_NOT_EXPR
? use_seen
: !use_seen
))
4439 || (code
== PLUS_EXPR
4440 && optab_handler (uaddv4_optab
,
4441 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4442 || (code
== MULT_EXPR
4443 && optab_handler (cast_stmt
? mulv4_optab
: umulv4_optab
,
4444 TYPE_MODE (type
)) == CODE_FOR_nothing
4447 || !can_mult_highpart_p (TYPE_MODE (type
), true))))
4449 if (code
!= PLUS_EXPR
)
4451 if (TREE_CODE (rhs1
) != SSA_NAME
4452 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs1
)))
4454 rhs1
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs1
));
4455 tree type1
= TREE_TYPE (rhs1
);
4456 if (!INTEGRAL_TYPE_P (type1
)
4457 || !TYPE_UNSIGNED (type1
)
4458 || TYPE_PRECISION (type1
) >= TYPE_PRECISION (type
)
4459 || (TYPE_PRECISION (type1
)
4460 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type1
))))
4462 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4464 if (wi::ne_p (wi::rshift (wi::to_wide (rhs2
),
4465 TYPE_PRECISION (type1
),
4468 rhs2
= fold_convert (type1
, rhs2
);
4472 if (TREE_CODE (rhs2
) != SSA_NAME
4473 || !gimple_assign_cast_p (SSA_NAME_DEF_STMT (rhs2
)))
4475 rhs2
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (rhs2
));
4476 tree type2
= TREE_TYPE (rhs2
);
4477 if (!INTEGRAL_TYPE_P (type2
)
4478 || !TYPE_UNSIGNED (type2
)
4479 || TYPE_PRECISION (type2
) >= TYPE_PRECISION (type
)
4480 || (TYPE_PRECISION (type2
)
4481 != GET_MODE_BITSIZE (SCALAR_INT_TYPE_MODE (type2
))))
4484 if (TYPE_PRECISION (type1
) >= TYPE_PRECISION (TREE_TYPE (rhs2
)))
4487 type
= TREE_TYPE (rhs2
);
4489 if (TREE_CODE (type
) != INTEGER_TYPE
4490 || optab_handler (uaddv4_optab
,
4491 TYPE_MODE (type
)) == CODE_FOR_nothing
)
4494 maxval
= wide_int_to_tree (type
, wi::max_value (TYPE_PRECISION (type
),
4496 ovf_use_seen
= false;
4498 basic_block use_bb
= NULL
;
4499 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
4501 use_stmt
= USE_STMT (use_p
);
4502 if (is_gimple_debug (use_stmt
))
4505 if (arith_overflow_check_p (stmt
, NULL
, use_stmt
, maxval
, NULL
))
4507 ovf_use_seen
= true;
4508 use_bb
= gimple_bb (use_stmt
);
4512 if (!gimple_assign_cast_p (use_stmt
)
4513 || gimple_assign_rhs_code (use_stmt
) == VIEW_CONVERT_EXPR
)
4515 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4516 if (!INTEGRAL_TYPE_P (TREE_TYPE (use_lhs
))
4517 || (TYPE_PRECISION (TREE_TYPE (use_lhs
))
4518 > TYPE_PRECISION (type
)))
4525 if (!useless_type_conversion_p (type
, TREE_TYPE (rhs1
)))
4529 tree new_rhs1
= make_ssa_name (type
);
4530 gimple
*g
= gimple_build_assign (new_rhs1
, NOP_EXPR
, rhs1
);
4531 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4534 else if (!useless_type_conversion_p (type
, TREE_TYPE (rhs2
)))
4538 tree new_rhs2
= make_ssa_name (type
);
4539 gimple
*g
= gimple_build_assign (new_rhs2
, NOP_EXPR
, rhs2
);
4540 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4545 /* If there are no uses of the wider addition, check if
4546 forwprop has not created a narrower addition.
4547 Require it to be in the same bb as the overflow check. */
4548 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs1
)
4550 use_stmt
= USE_STMT (use_p
);
4551 if (is_gimple_debug (use_stmt
))
4554 if (use_stmt
== stmt
)
4557 if (!is_gimple_assign (use_stmt
)
4558 || gimple_bb (use_stmt
) != use_bb
4559 || gimple_assign_rhs_code (use_stmt
) != PLUS_EXPR
)
4562 if (gimple_assign_rhs1 (use_stmt
) == rhs1
)
4564 if (!operand_equal_p (gimple_assign_rhs2 (use_stmt
),
4568 else if (gimple_assign_rhs2 (use_stmt
) == rhs1
)
4570 if (gimple_assign_rhs1 (use_stmt
) != rhs2
)
4576 add_stmt
= use_stmt
;
4579 if (add_stmt
== NULL
)
4582 /* If stmt and add_stmt are in the same bb, we need to find out
4583 which one is earlier. If they are in different bbs, we've
4584 checked add_stmt is in the same bb as one of the uses of the
4585 stmt lhs, so stmt needs to dominate add_stmt too. */
4586 if (gimple_bb (stmt
) == gimple_bb (add_stmt
))
4588 gimple_stmt_iterator gsif
= *gsi
;
4589 gimple_stmt_iterator gsib
= *gsi
;
4591 /* Search both forward and backward from stmt and have a small
4593 for (i
= 0; i
< 128; i
++)
4595 if (!gsi_end_p (gsib
))
4597 gsi_prev_nondebug (&gsib
);
4598 if (gsi_stmt (gsib
) == add_stmt
)
4604 else if (gsi_end_p (gsif
))
4606 if (!gsi_end_p (gsif
))
4608 gsi_next_nondebug (&gsif
);
4609 if (gsi_stmt (gsif
) == add_stmt
)
4616 *gsi
= gsi_for_stmt (add_stmt
);
4621 if (code
== BIT_NOT_EXPR
)
4622 *gsi
= gsi_for_stmt (cond_stmt
);
4624 auto_vec
<gimple
*, 8> mul_stmts
;
4625 if (code
== MULT_EXPR
&& cast_stmt
)
4627 type
= TREE_TYPE (cast_lhs
);
4628 gimple
*g
= SSA_NAME_DEF_STMT (rhs1
);
4629 if (gimple_assign_cast_p (g
)
4630 && useless_type_conversion_p (type
,
4631 TREE_TYPE (gimple_assign_rhs1 (g
)))
4632 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4633 rhs1
= gimple_assign_rhs1 (g
);
4636 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs1
);
4637 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4638 rhs1
= gimple_assign_lhs (g
);
4639 mul_stmts
.quick_push (g
);
4641 if (TREE_CODE (rhs2
) == INTEGER_CST
)
4642 rhs2
= fold_convert (type
, rhs2
);
4645 g
= SSA_NAME_DEF_STMT (rhs2
);
4646 if (gimple_assign_cast_p (g
)
4647 && useless_type_conversion_p (type
,
4648 TREE_TYPE (gimple_assign_rhs1 (g
)))
4649 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g
)))
4650 rhs2
= gimple_assign_rhs1 (g
);
4653 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, rhs2
);
4654 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4655 rhs2
= gimple_assign_lhs (g
);
4656 mul_stmts
.quick_push (g
);
4660 tree ctype
= build_complex_type (type
);
4661 gcall
*g
= gimple_build_call_internal (code
== MULT_EXPR
4663 : code
!= MINUS_EXPR
4664 ? IFN_ADD_OVERFLOW
: IFN_SUB_OVERFLOW
,
4666 tree ctmp
= make_ssa_name (ctype
);
4667 gimple_call_set_lhs (g
, ctmp
);
4668 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
4669 tree new_lhs
= (maxval
|| cast_stmt
) ? make_ssa_name (type
) : lhs
;
4671 if (code
!= BIT_NOT_EXPR
)
4673 g2
= gimple_build_assign (new_lhs
, REALPART_EXPR
,
4674 build1 (REALPART_EXPR
, type
, ctmp
));
4675 if (maxval
|| cast_stmt
)
4677 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4679 *gsi
= gsi_for_stmt (stmt
);
4682 gsi_replace (gsi
, g2
, true);
4683 if (code
== MULT_EXPR
)
4685 mul_stmts
.quick_push (g
);
4686 mul_stmts
.quick_push (g2
);
4689 g2
= gimple_build_assign (lhs
, NOP_EXPR
, new_lhs
);
4690 gsi_replace (gsi
, g2
, true);
4691 mul_stmts
.quick_push (g2
);
4695 tree ovf
= make_ssa_name (type
);
4696 g2
= gimple_build_assign (ovf
, IMAGPART_EXPR
,
4697 build1 (IMAGPART_EXPR
, type
, ctmp
));
4698 if (code
!= BIT_NOT_EXPR
)
4699 gsi_insert_after (gsi
, g2
, GSI_NEW_STMT
);
4701 gsi_insert_before (gsi
, g2
, GSI_SAME_STMT
);
4702 if (code
== MULT_EXPR
)
4703 mul_stmts
.quick_push (g2
);
4705 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, cast_lhs
? cast_lhs
: lhs
)
4707 if (is_gimple_debug (use_stmt
))
4710 gimple
*orig_use_stmt
= use_stmt
;
4711 int ovf_use
= arith_overflow_check_p (stmt
, cast_stmt
, use_stmt
,
4715 gcc_assert (code
!= BIT_NOT_EXPR
);
4718 tree use_lhs
= gimple_assign_lhs (use_stmt
);
4719 gimple_assign_set_rhs1 (use_stmt
, new_lhs
);
4720 if (useless_type_conversion_p (TREE_TYPE (use_lhs
),
4721 TREE_TYPE (new_lhs
)))
4722 gimple_assign_set_rhs_code (use_stmt
, SSA_NAME
);
4723 update_stmt (use_stmt
);
4727 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4729 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4730 gimple_cond_set_lhs (cond_stmt
, ovf
);
4731 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4732 gimple_cond_set_code (cond_stmt
, ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
);
4736 gcc_checking_assert (is_gimple_assign (use_stmt
));
4737 if (gimple_assign_rhs_class (use_stmt
) == GIMPLE_BINARY_RHS
)
4739 if (gimple_assign_rhs_code (use_stmt
) == RSHIFT_EXPR
)
4741 g2
= gimple_build_assign (make_ssa_name (boolean_type_node
),
4742 ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4743 ovf
, build_int_cst (type
, 0));
4744 gimple_stmt_iterator gsiu
= gsi_for_stmt (use_stmt
);
4745 gsi_insert_before (&gsiu
, g2
, GSI_SAME_STMT
);
4746 gimple_assign_set_rhs_with_ops (&gsiu
, NOP_EXPR
,
4747 gimple_assign_lhs (g2
));
4748 update_stmt (use_stmt
);
4750 single_imm_use (gimple_assign_lhs (use_stmt
), &use
,
4752 if (gimple_code (use_stmt
) == GIMPLE_COND
)
4754 gcond
*cond_stmt
= as_a
<gcond
*> (use_stmt
);
4755 gimple_cond_set_lhs (cond_stmt
, ovf
);
4756 gimple_cond_set_rhs (cond_stmt
, build_int_cst (type
, 0));
4760 gcc_checking_assert (is_gimple_assign (use_stmt
));
4761 if (gimple_assign_rhs_class (use_stmt
)
4762 == GIMPLE_BINARY_RHS
)
4764 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4765 gimple_assign_set_rhs2 (use_stmt
,
4766 build_int_cst (type
, 0));
4768 else if (gimple_assign_cast_p (use_stmt
))
4769 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4772 tree_code sc
= gimple_assign_rhs_code (use_stmt
);
4773 gcc_checking_assert (sc
== COND_EXPR
);
4774 tree cond
= gimple_assign_rhs1 (use_stmt
);
4775 cond
= build2 (TREE_CODE (cond
),
4776 boolean_type_node
, ovf
,
4777 build_int_cst (type
, 0));
4778 gimple_assign_set_rhs1 (use_stmt
, cond
);
4781 update_stmt (use_stmt
);
4782 gsi_remove (&gsiu
, true);
4783 gsiu
= gsi_for_stmt (g2
);
4784 gsi_remove (&gsiu
, true);
4789 gimple_assign_set_rhs1 (use_stmt
, ovf
);
4790 gimple_assign_set_rhs2 (use_stmt
, build_int_cst (type
, 0));
4791 gimple_assign_set_rhs_code (use_stmt
,
4793 ? NE_EXPR
: EQ_EXPR
);
4798 gcc_checking_assert (gimple_assign_rhs_code (use_stmt
)
4800 tree cond
= build2 (ovf_use
== 1 ? NE_EXPR
: EQ_EXPR
,
4801 boolean_type_node
, ovf
,
4802 build_int_cst (type
, 0));
4803 gimple_assign_set_rhs1 (use_stmt
, cond
);
4806 update_stmt (use_stmt
);
4807 if (code
== MULT_EXPR
&& use_stmt
!= orig_use_stmt
)
4809 gimple_stmt_iterator gsi2
= gsi_for_stmt (orig_use_stmt
);
4810 maybe_optimize_guarding_check (mul_stmts
, use_stmt
, orig_use_stmt
,
4814 if (single_imm_use (gimple_assign_lhs (orig_use_stmt
), &use
,
4816 && gimple_assign_cast_p (cast_stmt
))
4818 gimple_stmt_iterator gsi3
= gsi_for_stmt (cast_stmt
);
4819 gsi_remove (&gsi3
, true);
4820 release_ssa_name (gimple_assign_lhs (cast_stmt
));
4822 gsi_remove (&gsi2
, true);
4823 release_ssa_name (gimple_assign_lhs (orig_use_stmt
));
4828 gimple_stmt_iterator gsi2
= gsi_for_stmt (stmt
);
4829 gsi_remove (&gsi2
, true);
4832 gimple
*g
= gimple_build_assign (gimple_assign_lhs (add_stmt
),
4834 gsi2
= gsi_for_stmt (add_stmt
);
4835 gsi_replace (&gsi2
, g
, true);
4838 else if (code
== BIT_NOT_EXPR
)
4840 *gsi
= gsi_for_stmt (stmt
);
4841 gsi_remove (gsi
, true);
4842 release_ssa_name (lhs
);
4848 /* Helper of match_uaddc_usubc. Look through an integral cast
4849 which should preserve [0, 1] range value (unless source has
4850 1-bit signed type) and the cast has single use. */
4853 uaddc_cast (gimple
*g
)
4855 if (!gimple_assign_cast_p (g
))
4857 tree op
= gimple_assign_rhs1 (g
);
4858 if (TREE_CODE (op
) == SSA_NAME
4859 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
4860 && (TYPE_PRECISION (TREE_TYPE (op
)) > 1
4861 || TYPE_UNSIGNED (TREE_TYPE (op
)))
4862 && has_single_use (gimple_assign_lhs (g
)))
4863 return SSA_NAME_DEF_STMT (op
);
4867 /* Helper of match_uaddc_usubc. Look through a NE_EXPR
4868 comparison with 0 which also preserves [0, 1] value range. */
4871 uaddc_ne0 (gimple
*g
)
4873 if (is_gimple_assign (g
)
4874 && gimple_assign_rhs_code (g
) == NE_EXPR
4875 && integer_zerop (gimple_assign_rhs2 (g
))
4876 && TREE_CODE (gimple_assign_rhs1 (g
)) == SSA_NAME
4877 && has_single_use (gimple_assign_lhs (g
)))
4878 return SSA_NAME_DEF_STMT (gimple_assign_rhs1 (g
));
4882 /* Return true if G is {REAL,IMAG}PART_EXPR PART with SSA_NAME
4886 uaddc_is_cplxpart (gimple
*g
, tree_code part
)
4888 return (is_gimple_assign (g
)
4889 && gimple_assign_rhs_code (g
) == part
4890 && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (g
), 0)) == SSA_NAME
);
4893 /* Try to match e.g.
4894 _29 = .ADD_OVERFLOW (_3, _4);
4895 _30 = REALPART_EXPR <_29>;
4896 _31 = IMAGPART_EXPR <_29>;
4897 _32 = .ADD_OVERFLOW (_30, _38);
4898 _33 = REALPART_EXPR <_32>;
4899 _34 = IMAGPART_EXPR <_32>;
4902 _36 = .UADDC (_3, _4, _38);
4903 _33 = REALPART_EXPR <_36>;
4904 _35 = IMAGPART_EXPR <_36>;
4906 _22 = .SUB_OVERFLOW (_6, _5);
4907 _23 = REALPART_EXPR <_22>;
4908 _24 = IMAGPART_EXPR <_22>;
4909 _25 = .SUB_OVERFLOW (_23, _37);
4910 _26 = REALPART_EXPR <_25>;
4911 _27 = IMAGPART_EXPR <_25>;
4914 _29 = .USUBC (_6, _5, _37);
4915 _26 = REALPART_EXPR <_29>;
4916 _288 = IMAGPART_EXPR <_29>;
4917 provided _38 or _37 above have [0, 1] range
4918 and _3, _4 and _30 or _6, _5 and _23 are unsigned
4919 integral types with the same precision. Whether + or | or ^ is
4920 used on the IMAGPART_EXPR results doesn't matter, with one of
4921 added or subtracted operands in [0, 1] range at most one
4922 .ADD_OVERFLOW or .SUB_OVERFLOW will indicate overflow. */
4925 match_uaddc_usubc (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree_code code
)
4928 rhs
[0] = gimple_assign_rhs1 (stmt
);
4929 rhs
[1] = gimple_assign_rhs2 (stmt
);
4932 tree type
= TREE_TYPE (rhs
[0]);
4933 if (!INTEGRAL_TYPE_P (type
) || !TYPE_UNSIGNED (type
))
4936 auto_vec
<gimple
*, 2> temp_stmts
;
4937 if (code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
)
4939 /* If overflow flag is ignored on the MSB limb, we can end up with
4940 the most significant limb handled as r = op1 + op2 + ovf1 + ovf2;
4941 or r = op1 - op2 - ovf1 - ovf2; or various equivalent expressions
4942 thereof. Handle those like the ovf = ovf1 + ovf2; case to recognize
4943 the limb below the MSB, but also create another .UADDC/.USUBC call
4946 First look through assignments with the same rhs code as CODE,
4947 with the exception that subtraction of a constant is canonicalized
4948 into addition of its negation. rhs[0] will be minuend for
4949 subtractions and one of addends for addition, all other assigned
4950 rhs[i] operands will be subtrahends or other addends. */
4951 while (TREE_CODE (rhs
[0]) == SSA_NAME
&& !rhs
[3])
4953 gimple
*g
= SSA_NAME_DEF_STMT (rhs
[0]);
4954 if (has_single_use (rhs
[0])
4955 && is_gimple_assign (g
)
4956 && (gimple_assign_rhs_code (g
) == code
4957 || (code
== MINUS_EXPR
4958 && gimple_assign_rhs_code (g
) == PLUS_EXPR
4959 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
)))
4961 tree r2
= gimple_assign_rhs2 (g
);
4962 if (gimple_assign_rhs_code (g
) != code
)
4964 r2
= const_unop (NEGATE_EXPR
, TREE_TYPE (r2
), r2
);
4968 rhs
[0] = gimple_assign_rhs1 (g
);
4969 tree
&r
= rhs
[2] ? rhs
[3] : rhs
[2];
4971 temp_stmts
.quick_push (g
);
4976 for (int i
= 1; i
<= 2; ++i
)
4977 while (rhs
[i
] && TREE_CODE (rhs
[i
]) == SSA_NAME
&& !rhs
[3])
4979 gimple
*g
= SSA_NAME_DEF_STMT (rhs
[i
]);
4980 if (has_single_use (rhs
[i
])
4981 && is_gimple_assign (g
)
4982 && gimple_assign_rhs_code (g
) == PLUS_EXPR
)
4984 rhs
[i
] = gimple_assign_rhs1 (g
);
4986 rhs
[3] = gimple_assign_rhs2 (g
);
4988 rhs
[2] = gimple_assign_rhs2 (g
);
4989 temp_stmts
.quick_push (g
);
4994 /* If there are just 3 addends or one minuend and two subtrahends,
4995 check for UADDC or USUBC being pattern recognized earlier.
4996 Say r = op1 + op2 + ovf1 + ovf2; where the (ovf1 + ovf2) part
4997 got pattern matched earlier as __imag__ .UADDC (arg1, arg2, arg3)
4999 if (rhs
[2] && !rhs
[3])
5001 for (int i
= (code
== MINUS_EXPR
? 1 : 0); i
< 3; ++i
)
5002 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
5004 gimple
*im
= uaddc_cast (SSA_NAME_DEF_STMT (rhs
[i
]));
5005 im
= uaddc_ne0 (im
);
5006 if (uaddc_is_cplxpart (im
, IMAGPART_EXPR
))
5008 /* We found one of the 3 addends or 2 subtrahends to be
5009 __imag__ of something, verify it is .UADDC/.USUBC. */
5010 tree rhs1
= gimple_assign_rhs1 (im
);
5011 gimple
*ovf
= SSA_NAME_DEF_STMT (TREE_OPERAND (rhs1
, 0));
5012 tree ovf_lhs
= NULL_TREE
;
5013 tree ovf_arg1
= NULL_TREE
, ovf_arg2
= NULL_TREE
;
5014 if (gimple_call_internal_p (ovf
, code
== PLUS_EXPR
5016 : IFN_SUB_OVERFLOW
))
5018 /* Or verify it is .ADD_OVERFLOW/.SUB_OVERFLOW.
5019 This is for the case of 2 chained .UADDC/.USUBC,
5020 where the first one uses 0 carry-in and the second
5021 one ignores the carry-out.
5023 _16 = .ADD_OVERFLOW (_1, _2);
5024 _17 = REALPART_EXPR <_16>;
5025 _18 = IMAGPART_EXPR <_16>;
5028 where the first 3 statements come from the lower
5029 limb addition and the last 2 from the higher limb
5030 which ignores carry-out. */
5031 ovf_lhs
= gimple_call_lhs (ovf
);
5032 tree ovf_lhs_type
= TREE_TYPE (TREE_TYPE (ovf_lhs
));
5033 ovf_arg1
= gimple_call_arg (ovf
, 0);
5034 ovf_arg2
= gimple_call_arg (ovf
, 1);
5035 /* In that case we need to punt if the types don't
5037 if (!types_compatible_p (type
, ovf_lhs_type
)
5038 || !types_compatible_p (type
, TREE_TYPE (ovf_arg1
))
5039 || !types_compatible_p (type
,
5040 TREE_TYPE (ovf_arg2
)))
5041 ovf_lhs
= NULL_TREE
;
5044 for (int i
= (code
== PLUS_EXPR
? 1 : 0);
5047 tree r
= gimple_call_arg (ovf
, i
);
5048 if (TREE_CODE (r
) != SSA_NAME
)
5050 if (uaddc_is_cplxpart (SSA_NAME_DEF_STMT (r
),
5053 /* Punt if one of the args which isn't
5054 subtracted isn't __real__; that could
5055 then prevent better match later.
5057 _3 = .ADD_OVERFLOW (_1, _2);
5058 _4 = REALPART_EXPR <_3>;
5059 _5 = IMAGPART_EXPR <_3>;
5060 _7 = .ADD_OVERFLOW (_4, _6);
5061 _8 = REALPART_EXPR <_7>;
5062 _9 = IMAGPART_EXPR <_7>;
5066 We want to match this when called on
5067 the last stmt as a pair of .UADDC calls,
5068 but without this check we could turn
5069 that prematurely on _13 = _12 + _9;
5070 stmt into .UADDC with 0 carry-in just
5071 on the second .ADD_OVERFLOW call and
5072 another replacing the _12 and _13
5074 ovf_lhs
= NULL_TREE
;
5081 use_operand_p use_p
;
5082 imm_use_iterator iter
;
5083 tree re_lhs
= NULL_TREE
;
5084 FOR_EACH_IMM_USE_FAST (use_p
, iter
, ovf_lhs
)
5086 gimple
*use_stmt
= USE_STMT (use_p
);
5087 if (is_gimple_debug (use_stmt
))
5091 if (!uaddc_is_cplxpart (use_stmt
,
5094 ovf_lhs
= NULL_TREE
;
5097 re_lhs
= gimple_assign_lhs (use_stmt
);
5099 if (ovf_lhs
&& re_lhs
)
5101 FOR_EACH_IMM_USE_FAST (use_p
, iter
, re_lhs
)
5103 gimple
*use_stmt
= USE_STMT (use_p
);
5104 if (is_gimple_debug (use_stmt
))
5107 = gimple_call_internal_fn (ovf
);
5108 /* Punt if the __real__ of lhs is used
5109 in the same .*_OVERFLOW call.
5111 _3 = .ADD_OVERFLOW (_1, _2);
5112 _4 = REALPART_EXPR <_3>;
5113 _5 = IMAGPART_EXPR <_3>;
5114 _7 = .ADD_OVERFLOW (_4, _6);
5115 _8 = REALPART_EXPR <_7>;
5116 _9 = IMAGPART_EXPR <_7>;
5120 We want to match this when called on
5121 the last stmt as a pair of .UADDC calls,
5122 but without this check we could turn
5123 that prematurely on _13 = _12 + _5;
5124 stmt into .UADDC with 0 carry-in just
5125 on the first .ADD_OVERFLOW call and
5126 another replacing the _12 and _13
5128 if (gimple_call_internal_p (use_stmt
, ifn
))
5130 ovf_lhs
= NULL_TREE
;
5138 || gimple_call_internal_p (ovf
,
5140 ? IFN_UADDC
: IFN_USUBC
))
5141 && (optab_handler (code
== PLUS_EXPR
5142 ? uaddc5_optab
: usubc5_optab
,
5144 != CODE_FOR_nothing
))
5146 /* And in that case build another .UADDC/.USUBC
5147 call for the most significand limb addition.
5148 Overflow bit is ignored here. */
5150 std::swap (rhs
[i
], rhs
[2]);
5152 = gimple_build_call_internal (code
== PLUS_EXPR
5157 tree nlhs
= make_ssa_name (build_complex_type (type
));
5158 gimple_call_set_lhs (g
, nlhs
);
5159 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5160 tree ilhs
= gimple_assign_lhs (stmt
);
5161 g
= gimple_build_assign (ilhs
, REALPART_EXPR
,
5162 build1 (REALPART_EXPR
,
5165 gsi_replace (gsi
, g
, true);
5166 /* And if it is initialized from result of __imag__
5167 of .{ADD,SUB}_OVERFLOW call, replace that
5168 call with .U{ADD,SUB}C call with the same arguments,
5169 just 0 added as third argument. This isn't strictly
5170 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5171 produce the same result, but may result in better
5172 generated code on some targets where the backend can
5173 better prepare in how the result will be used. */
5176 tree zero
= build_zero_cst (type
);
5177 g
= gimple_build_call_internal (code
== PLUS_EXPR
5182 gimple_call_set_lhs (g
, ovf_lhs
);
5183 gimple_stmt_iterator gsi2
= gsi_for_stmt (ovf
);
5184 gsi_replace (&gsi2
, g
, true);
5192 if (code
== MINUS_EXPR
&& !rhs
[2])
5194 if (code
== MINUS_EXPR
)
5195 /* Code below expects rhs[0] and rhs[1] to have the IMAGPART_EXPRs.
5196 So, for MINUS_EXPR swap the single added rhs operand (others are
5197 subtracted) to rhs[3]. */
5198 std::swap (rhs
[0], rhs
[3]);
5200 /* Walk from both operands of STMT (for +/- even sometimes from
5201 all the 4 addends or 3 subtrahends), see through casts and != 0
5202 statements which would preserve [0, 1] range of values and
5203 check which is initialized from __imag__. */
5204 gimple
*im1
= NULL
, *im2
= NULL
;
5205 for (int i
= 0; i
< (code
== MINUS_EXPR
? 3 : 4); i
++)
5206 if (rhs
[i
] && TREE_CODE (rhs
[i
]) == SSA_NAME
)
5208 gimple
*im
= uaddc_cast (SSA_NAME_DEF_STMT (rhs
[i
]));
5209 im
= uaddc_ne0 (im
);
5210 if (uaddc_is_cplxpart (im
, IMAGPART_EXPR
))
5216 std::swap (rhs
[0], rhs
[i
]);
5222 std::swap (rhs
[1], rhs
[i
]);
5227 /* If we don't find at least two, punt. */
5230 /* Check they are __imag__ of .ADD_OVERFLOW or .SUB_OVERFLOW call results,
5231 either both .ADD_OVERFLOW or both .SUB_OVERFLOW and that we have
5232 uaddc5/usubc5 named pattern for the corresponding mode. */
5234 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im1
), 0));
5236 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im2
), 0));
5238 if (!is_gimple_call (ovf1
)
5239 || !gimple_call_internal_p (ovf1
)
5240 || ((ifn
= gimple_call_internal_fn (ovf1
)) != IFN_ADD_OVERFLOW
5241 && ifn
!= IFN_SUB_OVERFLOW
)
5242 || !gimple_call_internal_p (ovf2
, ifn
)
5243 || optab_handler (ifn
== IFN_ADD_OVERFLOW
? uaddc5_optab
: usubc5_optab
,
5244 TYPE_MODE (type
)) == CODE_FOR_nothing
5246 && optab_handler (code
== PLUS_EXPR
? uaddc5_optab
: usubc5_optab
,
5247 TYPE_MODE (type
)) == CODE_FOR_nothing
))
5249 tree arg1
, arg2
, arg3
= NULL_TREE
;
5250 gimple
*re1
= NULL
, *re2
= NULL
;
5251 /* On one of the two calls, one of the .ADD_OVERFLOW/.SUB_OVERFLOW arguments
5252 should be initialized from __real__ of the other of the two calls.
5253 Though, for .SUB_OVERFLOW, it has to be the first argument, not the
5255 for (int i
= (ifn
== IFN_ADD_OVERFLOW
? 1 : 0); i
>= 0; --i
)
5256 for (gimple
*ovf
= ovf1
; ovf
; ovf
= (ovf
== ovf1
? ovf2
: NULL
))
5258 tree arg
= gimple_call_arg (ovf
, i
);
5259 if (TREE_CODE (arg
) != SSA_NAME
)
5261 re1
= SSA_NAME_DEF_STMT (arg
);
5262 if (uaddc_is_cplxpart (re1
, REALPART_EXPR
)
5263 && (SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (re1
), 0))
5264 == (ovf
== ovf1
? ovf2
: ovf1
)))
5268 /* Make sure ovf2 is the .*_OVERFLOW call with argument
5269 initialized from __real__ of ovf1. */
5270 std::swap (rhs
[0], rhs
[1]);
5271 std::swap (im1
, im2
);
5272 std::swap (ovf1
, ovf2
);
5274 arg3
= gimple_call_arg (ovf
, 1 - i
);
5281 arg1
= gimple_call_arg (ovf1
, 0);
5282 arg2
= gimple_call_arg (ovf1
, 1);
5283 if (!types_compatible_p (type
, TREE_TYPE (arg1
)))
5285 int kind
[2] = { 0, 0 };
5286 tree arg_im
[2] = { NULL_TREE
, NULL_TREE
};
5287 /* At least one of arg2 and arg3 should have type compatible
5288 with arg1/rhs[0], and the other one should have value in [0, 1]
5289 range. If both are in [0, 1] range and type compatible with
5290 arg1/rhs[0], try harder to find after looking through casts,
5291 != 0 comparisons which one is initialized to __imag__ of
5292 .{ADD,SUB}_OVERFLOW or .U{ADD,SUB}C call results. */
5293 for (int i
= 0; i
< 2; ++i
)
5295 tree arg
= i
== 0 ? arg2
: arg3
;
5296 if (types_compatible_p (type
, TREE_TYPE (arg
)))
5298 if (!INTEGRAL_TYPE_P (TREE_TYPE (arg
))
5299 || (TYPE_PRECISION (TREE_TYPE (arg
)) == 1
5300 && !TYPE_UNSIGNED (TREE_TYPE (arg
))))
5302 if (tree_zero_one_valued_p (arg
))
5304 if (TREE_CODE (arg
) == SSA_NAME
)
5306 gimple
*g
= SSA_NAME_DEF_STMT (arg
);
5307 if (gimple_assign_cast_p (g
))
5309 tree op
= gimple_assign_rhs1 (g
);
5310 if (TREE_CODE (op
) == SSA_NAME
5311 && INTEGRAL_TYPE_P (TREE_TYPE (op
)))
5312 g
= SSA_NAME_DEF_STMT (op
);
5315 if (!uaddc_is_cplxpart (g
, IMAGPART_EXPR
))
5317 arg_im
[i
] = gimple_assign_lhs (g
);
5318 g
= SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (g
), 0));
5319 if (!is_gimple_call (g
) || !gimple_call_internal_p (g
))
5321 switch (gimple_call_internal_fn (g
))
5323 case IFN_ADD_OVERFLOW
:
5324 case IFN_SUB_OVERFLOW
:
5334 /* Make arg2 the one with compatible type and arg3 the one
5335 with [0, 1] range. If both is true for both operands,
5336 prefer as arg3 result of __imag__ of some ifn. */
5337 if ((kind
[0] & 1) == 0 || ((kind
[1] & 1) != 0 && kind
[0] > kind
[1]))
5339 std::swap (arg2
, arg3
);
5340 std::swap (kind
[0], kind
[1]);
5341 std::swap (arg_im
[0], arg_im
[1]);
5343 if ((kind
[0] & 1) == 0 || (kind
[1] & 6) == 0)
5345 if (!has_single_use (gimple_assign_lhs (im1
))
5346 || !has_single_use (gimple_assign_lhs (im2
))
5347 || !has_single_use (gimple_assign_lhs (re1
))
5348 || num_imm_uses (gimple_call_lhs (ovf1
)) != 2)
5350 /* Check that ovf2's result is used in __real__ and set re2
5351 to that statement. */
5352 use_operand_p use_p
;
5353 imm_use_iterator iter
;
5354 tree lhs
= gimple_call_lhs (ovf2
);
5355 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
5357 gimple
*use_stmt
= USE_STMT (use_p
);
5358 if (is_gimple_debug (use_stmt
))
5360 if (use_stmt
== im2
)
5364 if (!uaddc_is_cplxpart (use_stmt
, REALPART_EXPR
))
5368 /* Build .UADDC/.USUBC call which will be placed before the stmt. */
5369 gimple_stmt_iterator gsi2
= gsi_for_stmt (ovf2
);
5371 if ((kind
[1] & 4) != 0 && types_compatible_p (type
, TREE_TYPE (arg_im
[1])))
5373 if ((kind
[1] & 1) == 0)
5375 if (TREE_CODE (arg3
) == INTEGER_CST
)
5376 arg3
= fold_convert (type
, arg3
);
5379 g
= gimple_build_assign (make_ssa_name (type
), NOP_EXPR
, arg3
);
5380 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5381 arg3
= gimple_assign_lhs (g
);
5384 g
= gimple_build_call_internal (ifn
== IFN_ADD_OVERFLOW
5385 ? IFN_UADDC
: IFN_USUBC
,
5386 3, arg1
, arg2
, arg3
);
5387 tree nlhs
= make_ssa_name (TREE_TYPE (lhs
));
5388 gimple_call_set_lhs (g
, nlhs
);
5389 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
5390 /* In the case where stmt is | or ^ of two overflow flags
5391 or addition of those, replace stmt with __imag__ of the above
5392 added call. In case of arg1 + arg2 + (ovf1 + ovf2) or
5393 arg1 - arg2 - (ovf1 + ovf2) just emit it before stmt. */
5394 tree ilhs
= rhs
[2] ? make_ssa_name (type
) : gimple_assign_lhs (stmt
);
5395 g
= gimple_build_assign (ilhs
, IMAGPART_EXPR
,
5396 build1 (IMAGPART_EXPR
, TREE_TYPE (ilhs
), nlhs
));
5399 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5400 /* Remove some further statements which can't be kept in the IL because
5401 they can use SSA_NAMEs whose setter is going to be removed too. */
5402 for (gimple
*g2
: temp_stmts
)
5404 gsi2
= gsi_for_stmt (g2
);
5405 gsi_remove (&gsi2
, true);
5410 gsi_replace (gsi
, g
, true);
5411 /* Remove some statements which can't be kept in the IL because they
5412 use SSA_NAME whose setter is going to be removed too. */
5414 for (int i
= 0; i
< 2; i
++)
5415 if (rhs1
== gimple_assign_lhs (im2
))
5419 g
= SSA_NAME_DEF_STMT (rhs1
);
5420 rhs1
= gimple_assign_rhs1 (g
);
5421 gsi2
= gsi_for_stmt (g
);
5422 gsi_remove (&gsi2
, true);
5425 gcc_checking_assert (rhs1
== gimple_assign_lhs (im2
));
5426 gsi2
= gsi_for_stmt (im2
);
5427 gsi_remove (&gsi2
, true);
5429 /* Replace the re2 statement with __real__ of the newly added
5430 .UADDC/.USUBC call. */
5433 gsi2
= gsi_for_stmt (re2
);
5434 tree rlhs
= gimple_assign_lhs (re2
);
5435 g
= gimple_build_assign (rlhs
, REALPART_EXPR
,
5436 build1 (REALPART_EXPR
, TREE_TYPE (rlhs
), nlhs
));
5437 gsi_replace (&gsi2
, g
, true);
5441 /* If this is the arg1 + arg2 + (ovf1 + ovf2) or
5442 arg1 - arg2 - (ovf1 + ovf2) case for the most significant limb,
5443 replace stmt with __real__ of another .UADDC/.USUBC call which
5444 handles the most significant limb. Overflow flag from this is
5446 g
= gimple_build_call_internal (code
== PLUS_EXPR
5447 ? IFN_UADDC
: IFN_USUBC
,
5448 3, rhs
[3], rhs
[2], ilhs
);
5449 nlhs
= make_ssa_name (TREE_TYPE (lhs
));
5450 gimple_call_set_lhs (g
, nlhs
);
5451 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5452 ilhs
= gimple_assign_lhs (stmt
);
5453 g
= gimple_build_assign (ilhs
, REALPART_EXPR
,
5454 build1 (REALPART_EXPR
, TREE_TYPE (ilhs
), nlhs
));
5455 gsi_replace (gsi
, g
, true);
5457 if (TREE_CODE (arg3
) == SSA_NAME
)
5459 /* When pattern recognizing the second least significant limb
5460 above (i.e. first pair of .{ADD,SUB}_OVERFLOW calls for one limb),
5461 check if the [0, 1] range argument (i.e. carry in) isn't the
5462 result of another .{ADD,SUB}_OVERFLOW call (one handling the
5463 least significant limb). Again look through casts and != 0. */
5464 gimple
*im3
= SSA_NAME_DEF_STMT (arg3
);
5465 for (int i
= 0; i
< 2; ++i
)
5467 gimple
*im4
= uaddc_cast (im3
);
5473 im3
= uaddc_ne0 (im3
);
5474 if (uaddc_is_cplxpart (im3
, IMAGPART_EXPR
))
5477 = SSA_NAME_DEF_STMT (TREE_OPERAND (gimple_assign_rhs1 (im3
), 0));
5478 if (gimple_call_internal_p (ovf3
, ifn
))
5480 lhs
= gimple_call_lhs (ovf3
);
5481 arg1
= gimple_call_arg (ovf3
, 0);
5482 arg2
= gimple_call_arg (ovf3
, 1);
5483 if (types_compatible_p (type
, TREE_TYPE (TREE_TYPE (lhs
)))
5484 && types_compatible_p (type
, TREE_TYPE (arg1
))
5485 && types_compatible_p (type
, TREE_TYPE (arg2
)))
5487 /* And if it is initialized from result of __imag__
5488 of .{ADD,SUB}_OVERFLOW call, replace that
5489 call with .U{ADD,SUB}C call with the same arguments,
5490 just 0 added as third argument. This isn't strictly
5491 necessary, .ADD_OVERFLOW (x, y) and .UADDC (x, y, 0)
5492 produce the same result, but may result in better
5493 generated code on some targets where the backend can
5494 better prepare in how the result will be used. */
5495 g
= gimple_build_call_internal (ifn
== IFN_ADD_OVERFLOW
5496 ? IFN_UADDC
: IFN_USUBC
,
5498 build_zero_cst (type
));
5499 gimple_call_set_lhs (g
, lhs
);
5500 gsi2
= gsi_for_stmt (ovf3
);
5501 gsi_replace (&gsi2
, g
, true);
5509 /* Replace .POPCOUNT (x) == 1 or .POPCOUNT (x) != 1 with
5510 (x & (x - 1)) > x - 1 or (x & (x - 1)) <= x - 1 if .POPCOUNT
5511 isn't a direct optab. Also handle `<=`/`>` to be
5512 `x & (x - 1) !=/== x`. */
5515 match_single_bit_test (gimple_stmt_iterator
*gsi
, gimple
*stmt
)
5518 enum tree_code code
;
5519 bool was_le
= false;
5520 if (gimple_code (stmt
) == GIMPLE_COND
)
5522 clhs
= gimple_cond_lhs (stmt
);
5523 crhs
= gimple_cond_rhs (stmt
);
5524 code
= gimple_cond_code (stmt
);
5528 clhs
= gimple_assign_rhs1 (stmt
);
5529 crhs
= gimple_assign_rhs2 (stmt
);
5530 code
= gimple_assign_rhs_code (stmt
);
5532 if (code
!= LE_EXPR
&& code
!= GT_EXPR
5533 && code
!= EQ_EXPR
&& code
!= NE_EXPR
)
5535 if (code
== LE_EXPR
|| code
== GT_EXPR
)
5537 if (TREE_CODE (clhs
) != SSA_NAME
|| !integer_onep (crhs
))
5539 gimple
*call
= SSA_NAME_DEF_STMT (clhs
);
5540 combined_fn cfn
= gimple_call_combined_fn (call
);
5548 if (!has_single_use (clhs
))
5550 tree arg
= gimple_call_arg (call
, 0);
5551 tree type
= TREE_TYPE (arg
);
5552 if (!INTEGRAL_TYPE_P (type
))
5554 bool nonzero_arg
= tree_expr_nonzero_p (arg
);
5555 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, type
, OPTIMIZE_FOR_BOTH
))
5557 /* Tell expand_POPCOUNT the popcount result is only used in equality
5558 comparison with one, so that it can decide based on rtx costs. */
5559 gimple
*g
= gimple_build_call_internal (IFN_POPCOUNT
, 2, arg
,
5560 was_le
? integer_minus_one_node
5561 : (nonzero_arg
? integer_zero_node
5562 : integer_one_node
));
5563 gimple_call_set_lhs (g
, gimple_call_lhs (call
));
5564 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
5565 gsi_replace (&gsi2
, g
, true);
5568 tree argm1
= make_ssa_name (type
);
5569 gimple
*g
= gimple_build_assign (argm1
, PLUS_EXPR
, arg
,
5570 build_int_cst (type
, -1));
5571 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5572 g
= gimple_build_assign (make_ssa_name (type
),
5573 (nonzero_arg
|| was_le
) ? BIT_AND_EXPR
: BIT_XOR_EXPR
,
5575 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
5579 argm1
= build_zero_cst (type
);
5580 cmpcode
= code
== LE_EXPR
? EQ_EXPR
: NE_EXPR
;
5582 else if (nonzero_arg
)
5584 argm1
= build_zero_cst (type
);
5588 cmpcode
= code
== EQ_EXPR
? GT_EXPR
: LE_EXPR
;
5589 if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
))
5591 gimple_cond_set_lhs (cond
, gimple_assign_lhs (g
));
5592 gimple_cond_set_rhs (cond
, argm1
);
5593 gimple_cond_set_code (cond
, cmpcode
);
5597 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (g
));
5598 gimple_assign_set_rhs2 (stmt
, argm1
);
5599 gimple_assign_set_rhs_code (stmt
, cmpcode
);
5602 gimple_stmt_iterator gsi2
= gsi_for_stmt (call
);
5603 gsi_remove (&gsi2
, true);
5604 release_defs (call
);
5607 /* Return true if target has support for divmod. */
5610 target_supports_divmod_p (optab divmod_optab
, optab div_optab
, machine_mode mode
)
5612 /* If target supports hardware divmod insn, use it for divmod. */
5613 if (optab_handler (divmod_optab
, mode
) != CODE_FOR_nothing
)
5616 /* Check if libfunc for divmod is available. */
5617 rtx libfunc
= optab_libfunc (divmod_optab
, mode
);
5618 if (libfunc
!= NULL_RTX
)
5620 /* If optab_handler exists for div_optab, perhaps in a wider mode,
5621 we don't want to use the libfunc even if it exists for given mode. */
5622 machine_mode div_mode
;
5623 FOR_EACH_MODE_FROM (div_mode
, mode
)
5624 if (optab_handler (div_optab
, div_mode
) != CODE_FOR_nothing
)
5627 return targetm
.expand_divmod_libfunc
!= NULL
;
5633 /* Check if stmt is candidate for divmod transform. */
5636 divmod_candidate_p (gassign
*stmt
)
5638 tree type
= TREE_TYPE (gimple_assign_lhs (stmt
));
5639 machine_mode mode
= TYPE_MODE (type
);
5640 optab divmod_optab
, div_optab
;
5642 if (TYPE_UNSIGNED (type
))
5644 divmod_optab
= udivmod_optab
;
5645 div_optab
= udiv_optab
;
5649 divmod_optab
= sdivmod_optab
;
5650 div_optab
= sdiv_optab
;
5653 tree op1
= gimple_assign_rhs1 (stmt
);
5654 tree op2
= gimple_assign_rhs2 (stmt
);
5656 /* Disable the transform if either is a constant, since division-by-constant
5657 may have specialized expansion. */
5658 if (CONSTANT_CLASS_P (op1
))
5661 if (CONSTANT_CLASS_P (op2
))
5663 if (integer_pow2p (op2
))
5666 if (element_precision (type
) <= HOST_BITS_PER_WIDE_INT
5667 && element_precision (type
) <= BITS_PER_WORD
)
5670 /* If the divisor is not power of 2 and the precision wider than
5671 HWI, expand_divmod punts on that, so in that case it is better
5672 to use divmod optab or libfunc. Similarly if choose_multiplier
5673 might need pre/post shifts of BITS_PER_WORD or more. */
5676 /* Exclude the case where TYPE_OVERFLOW_TRAPS (type) as that should
5677 expand using the [su]divv optabs. */
5678 if (TYPE_OVERFLOW_TRAPS (type
))
5681 if (!target_supports_divmod_p (divmod_optab
, div_optab
, mode
))
5687 /* This function looks for:
5688 t1 = a TRUNC_DIV_EXPR b;
5689 t2 = a TRUNC_MOD_EXPR b;
5690 and transforms it to the following sequence:
5691 complex_tmp = DIVMOD (a, b);
5692 t1 = REALPART_EXPR(a);
5693 t2 = IMAGPART_EXPR(b);
5694 For conditions enabling the transform see divmod_candidate_p().
5696 The pass has three parts:
5697 1) Find top_stmt which is trunc_div or trunc_mod stmt and dominates all
5698 other trunc_div_expr and trunc_mod_expr stmts.
5699 2) Add top_stmt and all trunc_div and trunc_mod stmts dominated by top_stmt
5701 3) Insert DIVMOD call just before top_stmt and update entries in
5702 stmts vector to use return value of DIMOVD (REALEXPR_PART for div,
5703 IMAGPART_EXPR for mod). */
5706 convert_to_divmod (gassign
*stmt
)
5708 if (stmt_can_throw_internal (cfun
, stmt
)
5709 || !divmod_candidate_p (stmt
))
5712 tree op1
= gimple_assign_rhs1 (stmt
);
5713 tree op2
= gimple_assign_rhs2 (stmt
);
5715 imm_use_iterator use_iter
;
5717 auto_vec
<gimple
*> stmts
;
5719 gimple
*top_stmt
= stmt
;
5720 basic_block top_bb
= gimple_bb (stmt
);
5722 /* Part 1: Try to set top_stmt to "topmost" stmt that dominates
5723 at-least stmt and possibly other trunc_div/trunc_mod stmts
5724 having same operands as stmt. */
5726 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, op1
)
5728 if (is_gimple_assign (use_stmt
)
5729 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
5730 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
5731 && operand_equal_p (op1
, gimple_assign_rhs1 (use_stmt
), 0)
5732 && operand_equal_p (op2
, gimple_assign_rhs2 (use_stmt
), 0))
5734 if (stmt_can_throw_internal (cfun
, use_stmt
))
5737 basic_block bb
= gimple_bb (use_stmt
);
5741 if (gimple_uid (use_stmt
) < gimple_uid (top_stmt
))
5742 top_stmt
= use_stmt
;
5744 else if (dominated_by_p (CDI_DOMINATORS
, top_bb
, bb
))
5747 top_stmt
= use_stmt
;
5752 tree top_op1
= gimple_assign_rhs1 (top_stmt
);
5753 tree top_op2
= gimple_assign_rhs2 (top_stmt
);
5755 stmts
.safe_push (top_stmt
);
5756 bool div_seen
= (gimple_assign_rhs_code (top_stmt
) == TRUNC_DIV_EXPR
);
5758 /* Part 2: Add all trunc_div/trunc_mod statements domianted by top_bb
5759 to stmts vector. The 2nd loop will always add stmt to stmts vector, since
5760 gimple_bb (top_stmt) dominates gimple_bb (stmt), so the
5761 2nd loop ends up adding at-least single trunc_mod_expr stmt. */
5763 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, top_op1
)
5765 if (is_gimple_assign (use_stmt
)
5766 && (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
5767 || gimple_assign_rhs_code (use_stmt
) == TRUNC_MOD_EXPR
)
5768 && operand_equal_p (top_op1
, gimple_assign_rhs1 (use_stmt
), 0)
5769 && operand_equal_p (top_op2
, gimple_assign_rhs2 (use_stmt
), 0))
5771 if (use_stmt
== top_stmt
5772 || stmt_can_throw_internal (cfun
, use_stmt
)
5773 || !dominated_by_p (CDI_DOMINATORS
, gimple_bb (use_stmt
), top_bb
))
5776 stmts
.safe_push (use_stmt
);
5777 if (gimple_assign_rhs_code (use_stmt
) == TRUNC_DIV_EXPR
)
5785 /* Part 3: Create libcall to internal fn DIVMOD:
5786 divmod_tmp = DIVMOD (op1, op2). */
5788 gcall
*call_stmt
= gimple_build_call_internal (IFN_DIVMOD
, 2, op1
, op2
);
5789 tree res
= make_temp_ssa_name (build_complex_type (TREE_TYPE (op1
)),
5790 call_stmt
, "divmod_tmp");
5791 gimple_call_set_lhs (call_stmt
, res
);
5792 /* We rejected throwing statements above. */
5793 gimple_call_set_nothrow (call_stmt
, true);
5795 /* Insert the call before top_stmt. */
5796 gimple_stmt_iterator top_stmt_gsi
= gsi_for_stmt (top_stmt
);
5797 gsi_insert_before (&top_stmt_gsi
, call_stmt
, GSI_SAME_STMT
);
5799 widen_mul_stats
.divmod_calls_inserted
++;
5801 /* Update all statements in stmts vector:
5802 lhs = op1 TRUNC_DIV_EXPR op2 -> lhs = REALPART_EXPR<divmod_tmp>
5803 lhs = op1 TRUNC_MOD_EXPR op2 -> lhs = IMAGPART_EXPR<divmod_tmp>. */
5805 for (unsigned i
= 0; stmts
.iterate (i
, &use_stmt
); ++i
)
5809 switch (gimple_assign_rhs_code (use_stmt
))
5811 case TRUNC_DIV_EXPR
:
5812 new_rhs
= fold_build1 (REALPART_EXPR
, TREE_TYPE (op1
), res
);
5815 case TRUNC_MOD_EXPR
:
5816 new_rhs
= fold_build1 (IMAGPART_EXPR
, TREE_TYPE (op1
), res
);
5823 gimple_stmt_iterator gsi
= gsi_for_stmt (use_stmt
);
5824 gimple_assign_set_rhs_from_tree (&gsi
, new_rhs
);
5825 update_stmt (use_stmt
);
5831 /* Process a single gimple assignment STMT, which has a RSHIFT_EXPR as
5832 its rhs, and try to convert it into a MULT_HIGHPART_EXPR. The return
5833 value is true iff we converted the statement. */
5836 convert_mult_to_highpart (gassign
*stmt
, gimple_stmt_iterator
*gsi
)
5838 tree lhs
= gimple_assign_lhs (stmt
);
5839 tree stype
= TREE_TYPE (lhs
);
5840 tree sarg0
= gimple_assign_rhs1 (stmt
);
5841 tree sarg1
= gimple_assign_rhs2 (stmt
);
5843 if (TREE_CODE (stype
) != INTEGER_TYPE
5844 || TREE_CODE (sarg1
) != INTEGER_CST
5845 || TREE_CODE (sarg0
) != SSA_NAME
5846 || !tree_fits_uhwi_p (sarg1
)
5847 || !has_single_use (sarg0
))
5850 gassign
*def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (sarg0
));
5854 enum tree_code mcode
= gimple_assign_rhs_code (def
);
5855 if (mcode
== NOP_EXPR
)
5857 tree tmp
= gimple_assign_rhs1 (def
);
5858 if (TREE_CODE (tmp
) != SSA_NAME
|| !has_single_use (tmp
))
5860 def
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (tmp
));
5863 mcode
= gimple_assign_rhs_code (def
);
5866 if (mcode
!= WIDEN_MULT_EXPR
5867 || gimple_bb (def
) != gimple_bb (stmt
))
5869 tree mtype
= TREE_TYPE (gimple_assign_lhs (def
));
5870 if (TREE_CODE (mtype
) != INTEGER_TYPE
5871 || TYPE_PRECISION (mtype
) != TYPE_PRECISION (stype
))
5874 tree mop1
= gimple_assign_rhs1 (def
);
5875 tree mop2
= gimple_assign_rhs2 (def
);
5876 tree optype
= TREE_TYPE (mop1
);
5877 bool unsignedp
= TYPE_UNSIGNED (optype
);
5878 unsigned int prec
= TYPE_PRECISION (optype
);
5880 if (unsignedp
!= TYPE_UNSIGNED (mtype
)
5881 || TYPE_PRECISION (mtype
) != 2 * prec
)
5884 unsigned HOST_WIDE_INT bits
= tree_to_uhwi (sarg1
);
5885 if (bits
< prec
|| bits
>= 2 * prec
)
5888 /* For the time being, require operands to have the same sign. */
5889 if (unsignedp
!= TYPE_UNSIGNED (TREE_TYPE (mop2
)))
5892 machine_mode mode
= TYPE_MODE (optype
);
5893 optab tab
= unsignedp
? umul_highpart_optab
: smul_highpart_optab
;
5894 if (optab_handler (tab
, mode
) == CODE_FOR_nothing
)
5897 location_t loc
= gimple_location (stmt
);
5898 tree highpart1
= build_and_insert_binop (gsi
, loc
, "highparttmp",
5899 MULT_HIGHPART_EXPR
, mop1
, mop2
);
5900 tree highpart2
= highpart1
;
5901 tree ntype
= optype
;
5903 if (TYPE_UNSIGNED (stype
) != TYPE_UNSIGNED (optype
))
5905 ntype
= TYPE_UNSIGNED (stype
) ? unsigned_type_for (optype
)
5906 : signed_type_for (optype
);
5907 highpart2
= build_and_insert_cast (gsi
, loc
, ntype
, highpart1
);
5910 highpart2
= build_and_insert_binop (gsi
, loc
, "highparttmp",
5911 RSHIFT_EXPR
, highpart2
,
5912 build_int_cst (ntype
, bits
- prec
));
5914 gassign
*new_stmt
= gimple_build_assign (lhs
, NOP_EXPR
, highpart2
);
5915 gsi_replace (gsi
, new_stmt
, true);
5917 widen_mul_stats
.highpart_mults_inserted
++;
5921 /* If target has spaceship<MODE>3 expander, pattern recognize
5922 <bb 2> [local count: 1073741824]:
5923 if (a_2(D) == b_3(D))
5924 goto <bb 6>; [34.00%]
5926 goto <bb 3>; [66.00%]
5928 <bb 3> [local count: 708669601]:
5929 if (a_2(D) < b_3(D))
5930 goto <bb 6>; [1.04%]
5932 goto <bb 4>; [98.96%]
5934 <bb 4> [local count: 701299439]:
5935 if (a_2(D) > b_3(D))
5936 goto <bb 5>; [48.89%]
5938 goto <bb 6>; [51.11%]
5940 <bb 5> [local count: 342865295]:
5942 <bb 6> [local count: 1073741824]:
5944 <bb 2> [local count: 1073741824]:
5945 _1 = .SPACESHIP (a_2(D), b_3(D), 0);
5947 goto <bb 6>; [34.00%]
5949 goto <bb 3>; [66.00%]
5951 <bb 3> [local count: 708669601]:
5953 goto <bb 6>; [1.04%]
5955 goto <bb 4>; [98.96%]
5957 <bb 4> [local count: 701299439]:
5959 goto <bb 5>; [48.89%]
5961 goto <bb 6>; [51.11%]
5963 <bb 5> [local count: 342865295]:
5965 <bb 6> [local count: 1073741824]:
5966 so that the backend can emit optimal comparison and
5967 conditional jump sequence. If the
5968 <bb 6> [local count: 1073741824]:
5969 above has a single PHI like:
5970 # _27 = PHI<0(2), -1(3), 2(4), 1(5)>
5971 then replace it with effectively
5972 _1 = .SPACESHIP (a_2(D), b_3(D), 1);
5976 optimize_spaceship (gcond
*stmt
)
5978 enum tree_code code
= gimple_cond_code (stmt
);
5979 if (code
!= EQ_EXPR
&& code
!= NE_EXPR
)
5981 tree arg1
= gimple_cond_lhs (stmt
);
5982 tree arg2
= gimple_cond_rhs (stmt
);
5983 if ((!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
))
5984 && !INTEGRAL_TYPE_P (TREE_TYPE (arg1
)))
5985 || optab_handler (spaceship_optab
,
5986 TYPE_MODE (TREE_TYPE (arg1
))) == CODE_FOR_nothing
5987 || operand_equal_p (arg1
, arg2
, 0))
5990 basic_block bb0
= gimple_bb (stmt
), bb1
, bb2
= NULL
;
5991 edge em1
= NULL
, e1
= NULL
, e2
= NULL
;
5992 bb1
= EDGE_SUCC (bb0
, 1)->dest
;
5993 if (((EDGE_SUCC (bb0
, 0)->flags
& EDGE_TRUE_VALUE
) != 0) ^ (code
== EQ_EXPR
))
5994 bb1
= EDGE_SUCC (bb0
, 0)->dest
;
5996 gcond
*g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb1
));
5998 || !single_pred_p (bb1
)
5999 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
6000 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
6001 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
6002 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
6003 || !cond_only_block_p (bb1
))
6006 enum tree_code ccode
= (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
6007 ? LT_EXPR
: GT_EXPR
);
6008 switch (gimple_cond_code (g
))
6015 ccode
= ccode
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
6021 for (int i
= 0; i
< 2; ++i
)
6023 /* With NaNs, </<=/>/>= are false, so we need to look for the
6024 third comparison on the false edge from whatever non-equality
6025 comparison the second comparison is. */
6026 if (HONOR_NANS (TREE_TYPE (arg1
))
6027 && (EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0)
6030 bb2
= EDGE_SUCC (bb1
, i
)->dest
;
6031 g
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb2
));
6033 || !single_pred_p (bb2
)
6034 || (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0)
6035 ? !operand_equal_p (gimple_cond_rhs (g
), arg2
, 0)
6036 : (!operand_equal_p (gimple_cond_lhs (g
), arg2
, 0)
6037 || !operand_equal_p (gimple_cond_rhs (g
), arg1
, 0)))
6038 || !cond_only_block_p (bb2
)
6039 || EDGE_SUCC (bb2
, 0)->dest
== EDGE_SUCC (bb2
, 1)->dest
)
6042 enum tree_code ccode2
6043 = (operand_equal_p (gimple_cond_lhs (g
), arg1
, 0) ? LT_EXPR
: GT_EXPR
);
6044 switch (gimple_cond_code (g
))
6051 ccode2
= ccode2
== LT_EXPR
? GT_EXPR
: LT_EXPR
;
6056 if (HONOR_NANS (TREE_TYPE (arg1
)) && ccode
== ccode2
)
6059 if ((ccode
== LT_EXPR
)
6060 ^ ((EDGE_SUCC (bb1
, i
)->flags
& EDGE_TRUE_VALUE
) != 0))
6062 em1
= EDGE_SUCC (bb1
, 1 - i
);
6063 e1
= EDGE_SUCC (bb2
, 0);
6064 e2
= EDGE_SUCC (bb2
, 1);
6065 if ((ccode2
== LT_EXPR
) ^ ((e1
->flags
& EDGE_TRUE_VALUE
) == 0))
6070 e1
= EDGE_SUCC (bb1
, 1 - i
);
6071 em1
= EDGE_SUCC (bb2
, 0);
6072 e2
= EDGE_SUCC (bb2
, 1);
6073 if ((ccode2
!= LT_EXPR
) ^ ((em1
->flags
& EDGE_TRUE_VALUE
) == 0))
6074 std::swap (em1
, e2
);
6081 if ((ccode
== LT_EXPR
)
6082 ^ ((EDGE_SUCC (bb1
, 0)->flags
& EDGE_TRUE_VALUE
) != 0))
6084 em1
= EDGE_SUCC (bb1
, 1);
6085 e1
= EDGE_SUCC (bb1
, 0);
6086 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
6090 em1
= EDGE_SUCC (bb1
, 0);
6091 e1
= EDGE_SUCC (bb1
, 1);
6092 e2
= (e1
->flags
& EDGE_TRUE_VALUE
) ? em1
: e1
;
6096 /* Check if there is a single bb into which all failed conditions
6097 jump to (perhaps through an empty block) and if it results in
6098 a single integral PHI which just sets it to -1, 0, 1, X
6099 (or -1, 0, 1 when NaNs can't happen). In that case use 1 rather
6100 than 0 as last .SPACESHIP argument to tell backends it might
6101 consider different code generation and just cast the result
6102 of .SPACESHIP to the PHI result. X above is some value
6103 other than -1, 0, 1, for libstdc++ 2, for libc++ -127. */
6104 tree arg3
= integer_zero_node
;
6105 edge e
= EDGE_SUCC (bb0
, 0);
6107 e
= EDGE_SUCC (bb0
, 1);
6108 basic_block bbp
= e
->dest
;
6110 for (gphi_iterator psi
= gsi_start_phis (bbp
);
6111 !gsi_end_p (psi
); gsi_next (&psi
))
6113 gphi
*gp
= psi
.phi ();
6114 tree res
= gimple_phi_result (gp
);
6117 || virtual_operand_p (res
)
6118 || !INTEGRAL_TYPE_P (TREE_TYPE (res
))
6119 || TYPE_PRECISION (TREE_TYPE (res
)) < 2)
6127 && integer_zerop (gimple_phi_arg_def_from_edge (phi
, e
))
6128 && EDGE_COUNT (bbp
->preds
) == (HONOR_NANS (TREE_TYPE (arg1
)) ? 4 : 3))
6130 HOST_WIDE_INT argval
= SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
)) ? 2 : -1;
6131 for (unsigned i
= 0; phi
&& i
< EDGE_COUNT (bbp
->preds
) - 1; ++i
)
6133 edge e3
= i
== 0 ? e1
: i
== 1 ? em1
: e2
;
6134 if (e3
->dest
!= bbp
)
6136 if (!empty_block_p (e3
->dest
)
6137 || !single_succ_p (e3
->dest
)
6138 || single_succ (e3
->dest
) != bbp
)
6143 e3
= single_succ_edge (e3
->dest
);
6145 tree a
= gimple_phi_arg_def_from_edge (phi
, e3
);
6146 if (TREE_CODE (a
) != INTEGER_CST
6147 || (i
== 0 && !integer_onep (a
))
6148 || (i
== 1 && !integer_all_onesp (a
)))
6155 tree minv
= TYPE_MIN_VALUE (signed_char_type_node
);
6156 tree maxv
= TYPE_MAX_VALUE (signed_char_type_node
);
6157 widest_int w
= widest_int::from (wi::to_wide (a
), SIGNED
);
6158 if ((w
>= -1 && w
<= 1)
6159 || w
< wi::to_widest (minv
)
6160 || w
> wi::to_widest (maxv
))
6165 argval
= w
.to_shwi ();
6169 arg3
= build_int_cst (integer_type_node
,
6170 TYPE_UNSIGNED (TREE_TYPE (arg1
)) ? 1 : argval
);
6173 /* For integral <=> comparisons only use .SPACESHIP if it is turned
6174 into an integer (-1, 0, 1). */
6175 if (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (arg1
)) && arg3
== integer_zero_node
)
6178 gcall
*gc
= gimple_build_call_internal (IFN_SPACESHIP
, 3, arg1
, arg2
, arg3
);
6179 tree lhs
= make_ssa_name (integer_type_node
);
6180 gimple_call_set_lhs (gc
, lhs
);
6181 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6182 gsi_insert_before (&gsi
, gc
, GSI_SAME_STMT
);
6184 wide_int wmin
= wi::minus_one (TYPE_PRECISION (integer_type_node
));
6185 wide_int wmax
= wi::one (TYPE_PRECISION (integer_type_node
));
6186 if (HONOR_NANS (TREE_TYPE (arg1
)))
6188 if (arg3
== integer_zero_node
)
6189 wmax
= wi::two (TYPE_PRECISION (integer_type_node
));
6190 else if (tree_int_cst_sgn (arg3
) < 0)
6191 wmin
= wi::to_wide (arg3
);
6193 wmax
= wi::to_wide (arg3
);
6195 int_range
<1> vr (TREE_TYPE (lhs
), wmin
, wmax
);
6196 set_range_info (lhs
, vr
);
6198 if (arg3
!= integer_zero_node
)
6200 tree type
= TREE_TYPE (gimple_phi_result (phi
));
6201 if (!useless_type_conversion_p (type
, integer_type_node
))
6203 tree tem
= make_ssa_name (type
);
6204 gimple
*gcv
= gimple_build_assign (tem
, NOP_EXPR
, lhs
);
6205 gsi_insert_before (&gsi
, gcv
, GSI_SAME_STMT
);
6208 SET_PHI_ARG_DEF_ON_EDGE (phi
, e
, lhs
);
6209 gimple_cond_set_lhs (stmt
, boolean_false_node
);
6210 gimple_cond_set_rhs (stmt
, boolean_false_node
);
6211 gimple_cond_set_code (stmt
, (e
->flags
& EDGE_TRUE_VALUE
)
6212 ? EQ_EXPR
: NE_EXPR
);
6217 gimple_cond_set_lhs (stmt
, lhs
);
6218 gimple_cond_set_rhs (stmt
, integer_zero_node
);
6221 gcond
*cond
= as_a
<gcond
*> (*gsi_last_bb (bb1
));
6222 gimple_cond_set_lhs (cond
, lhs
);
6223 if (em1
->src
== bb1
&& e2
!= em1
)
6225 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
6226 gimple_cond_set_code (cond
, (em1
->flags
& EDGE_TRUE_VALUE
)
6227 ? EQ_EXPR
: NE_EXPR
);
6231 gcc_assert (e1
->src
== bb1
&& e2
!= e1
);
6232 gimple_cond_set_rhs (cond
, integer_one_node
);
6233 gimple_cond_set_code (cond
, (e1
->flags
& EDGE_TRUE_VALUE
)
6234 ? EQ_EXPR
: NE_EXPR
);
6238 if (e2
!= e1
&& e2
!= em1
)
6240 cond
= as_a
<gcond
*> (*gsi_last_bb (bb2
));
6241 gimple_cond_set_lhs (cond
, lhs
);
6242 if (em1
->src
== bb2
)
6243 gimple_cond_set_rhs (cond
, integer_minus_one_node
);
6246 gcc_assert (e1
->src
== bb2
);
6247 gimple_cond_set_rhs (cond
, integer_one_node
);
6249 gimple_cond_set_code (cond
,
6250 (e2
->flags
& EDGE_TRUE_VALUE
) ? NE_EXPR
: EQ_EXPR
);
6256 /* Find integer multiplications where the operands are extended from
6257 smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
6258 or MULT_HIGHPART_EXPR where appropriate. */
6262 const pass_data pass_data_optimize_widening_mul
=
6264 GIMPLE_PASS
, /* type */
6265 "widening_mul", /* name */
6266 OPTGROUP_NONE
, /* optinfo_flags */
6267 TV_TREE_WIDEN_MUL
, /* tv_id */
6268 PROP_ssa
, /* properties_required */
6269 0, /* properties_provided */
6270 0, /* properties_destroyed */
6271 0, /* todo_flags_start */
6272 TODO_update_ssa
, /* todo_flags_finish */
6275 class pass_optimize_widening_mul
: public gimple_opt_pass
6278 pass_optimize_widening_mul (gcc::context
*ctxt
)
6279 : gimple_opt_pass (pass_data_optimize_widening_mul
, ctxt
)
6282 /* opt_pass methods: */
6283 bool gate (function
*) final override
6285 return flag_expensive_optimizations
&& optimize
;
6288 unsigned int execute (function
*) final override
;
6290 }; // class pass_optimize_widening_mul
6292 /* Walker class to perform the transformation in reverse dominance order. */
6294 class math_opts_dom_walker
: public dom_walker
6297 /* Constructor, CFG_CHANGED is a pointer to a boolean flag that will be set
6298 if walking modidifes the CFG. */
6300 math_opts_dom_walker (bool *cfg_changed_p
)
6301 : dom_walker (CDI_DOMINATORS
), m_last_result_set (),
6302 m_cfg_changed_p (cfg_changed_p
) {}
6304 /* The actual actions performed in the walk. */
6306 void after_dom_children (basic_block
) final override
;
6308 /* Set of results of chains of multiply and add statement combinations that
6309 were not transformed into FMAs because of active deferring. */
6310 hash_set
<tree
> m_last_result_set
;
6312 /* Pointer to a flag of the user that needs to be set if CFG has been
6314 bool *m_cfg_changed_p
;
6318 math_opts_dom_walker::after_dom_children (basic_block bb
)
6320 gimple_stmt_iterator gsi
;
6322 fma_deferring_state
fma_state (param_avoid_fma_max_bits
> 0);
6324 for (gphi_iterator psi_next
, psi
= gsi_start_phis (bb
); !gsi_end_p (psi
);
6328 gsi_next (&psi_next
);
6330 gimple_stmt_iterator gsi
= gsi_after_labels (bb
);
6331 gphi
*phi
= psi
.phi ();
6333 if (match_saturation_add (&gsi
, phi
)
6334 || match_saturation_sub (&gsi
, phi
)
6335 || match_saturation_trunc (&gsi
, phi
))
6336 remove_phi_node (&psi
, /* release_lhs_p */ false);
6339 for (gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);)
6341 gimple
*stmt
= gsi_stmt (gsi
);
6342 enum tree_code code
;
6344 if (is_gimple_assign (stmt
))
6346 code
= gimple_assign_rhs_code (stmt
);
6350 if (!convert_mult_to_widen (stmt
, &gsi
)
6351 && !convert_expand_mult_copysign (stmt
, &gsi
)
6352 && convert_mult_to_fma (stmt
,
6353 gimple_assign_rhs1 (stmt
),
6354 gimple_assign_rhs2 (stmt
),
6357 gsi_remove (&gsi
, true);
6358 release_defs (stmt
);
6361 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
6362 match_unsigned_saturation_sub (&gsi
, as_a
<gassign
*> (stmt
));
6366 match_unsigned_saturation_add (&gsi
, as_a
<gassign
*> (stmt
));
6367 match_unsigned_saturation_sub (&gsi
, as_a
<gassign
*> (stmt
));
6370 if (!convert_plusminus_to_widen (&gsi
, stmt
, code
))
6372 match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
);
6373 if (gsi_stmt (gsi
) == stmt
)
6374 match_uaddc_usubc (&gsi
, stmt
, code
);
6379 if (match_arith_overflow (&gsi
, stmt
, code
, m_cfg_changed_p
))
6383 case TRUNC_MOD_EXPR
:
6384 convert_to_divmod (as_a
<gassign
*> (stmt
));
6388 convert_mult_to_highpart (as_a
<gassign
*> (stmt
), &gsi
);
6392 match_unsigned_saturation_add (&gsi
, as_a
<gassign
*> (stmt
));
6393 match_unsigned_saturation_trunc (&gsi
, as_a
<gassign
*> (stmt
));
6396 match_uaddc_usubc (&gsi
, stmt
, code
);
6403 match_single_bit_test (&gsi
, stmt
);
6408 match_unsigned_saturation_sub (&gsi
, as_a
<gassign
*> (stmt
));
6412 match_unsigned_saturation_trunc (&gsi
, as_a
<gassign
*> (stmt
));
6418 else if (is_gimple_call (stmt
))
6420 switch (gimple_call_combined_fn (stmt
))
6423 if (gimple_call_lhs (stmt
)
6424 && TREE_CODE (gimple_call_arg (stmt
, 1)) == REAL_CST
6425 && real_equal (&TREE_REAL_CST (gimple_call_arg (stmt
, 1)),
6427 && convert_mult_to_fma (stmt
,
6428 gimple_call_arg (stmt
, 0),
6429 gimple_call_arg (stmt
, 0),
6432 unlink_stmt_vdef (stmt
);
6433 if (gsi_remove (&gsi
, true)
6434 && gimple_purge_dead_eh_edges (bb
))
6435 *m_cfg_changed_p
= true;
6436 release_defs (stmt
);
6442 if (convert_mult_to_fma (stmt
,
6443 gimple_call_arg (stmt
, 1),
6444 gimple_call_arg (stmt
, 2),
6446 gimple_call_arg (stmt
, 0)))
6449 gsi_remove (&gsi
, true);
6450 release_defs (stmt
);
6455 case CFN_COND_LEN_MUL
:
6456 if (convert_mult_to_fma (stmt
,
6457 gimple_call_arg (stmt
, 1),
6458 gimple_call_arg (stmt
, 2),
6460 gimple_call_arg (stmt
, 0),
6461 gimple_call_arg (stmt
, 4),
6462 gimple_call_arg (stmt
, 5)))
6465 gsi_remove (&gsi
, true);
6466 release_defs (stmt
);
6472 cancel_fma_deferring (&fma_state
);
6479 else if (gimple_code (stmt
) == GIMPLE_COND
)
6481 match_single_bit_test (&gsi
, stmt
);
6482 optimize_spaceship (as_a
<gcond
*> (stmt
));
6486 if (fma_state
.m_deferring_p
6487 && fma_state
.m_initial_phi
)
6489 gcc_checking_assert (fma_state
.m_last_result
);
6490 if (!last_fma_candidate_feeds_initial_phi (&fma_state
,
6491 &m_last_result_set
))
6492 cancel_fma_deferring (&fma_state
);
6494 m_last_result_set
.add (fma_state
.m_last_result
);
6500 pass_optimize_widening_mul::execute (function
*fun
)
6502 bool cfg_changed
= false;
6504 memset (&widen_mul_stats
, 0, sizeof (widen_mul_stats
));
6505 calculate_dominance_info (CDI_DOMINATORS
);
6506 renumber_gimple_stmt_uids (cfun
);
6508 math_opts_dom_walker (&cfg_changed
).walk (ENTRY_BLOCK_PTR_FOR_FN (cfun
));
6510 statistics_counter_event (fun
, "widening multiplications inserted",
6511 widen_mul_stats
.widen_mults_inserted
);
6512 statistics_counter_event (fun
, "widening maccs inserted",
6513 widen_mul_stats
.maccs_inserted
);
6514 statistics_counter_event (fun
, "fused multiply-adds inserted",
6515 widen_mul_stats
.fmas_inserted
);
6516 statistics_counter_event (fun
, "divmod calls inserted",
6517 widen_mul_stats
.divmod_calls_inserted
);
6518 statistics_counter_event (fun
, "highpart multiplications inserted",
6519 widen_mul_stats
.highpart_mults_inserted
);
6521 return cfg_changed
? TODO_cleanup_cfg
: 0;
6527 make_pass_optimize_widening_mul (gcc::context
*ctxt
)
6529 return new pass_optimize_widening_mul (ctxt
);