1 /* Reassociation for trees.
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
110 You want to first merge all leaves of the same rank, as much as
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
164 newstmt = mergetmp + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p
;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p
;
192 int constants_eliminated
;
195 int pows_encountered
;
200 static object_allocator
<operand_entry
> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id
;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
210 static int64_t *bb_rank
;
212 /* Operand->rank hashtable. */
213 static hash_map
<tree
, int64_t> *operand_rank
;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
217 static auto_bitmap biased_names
;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec
<tree
> reassoc_branch_fixups
;
226 static int64_t get_rank (tree
);
227 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
233 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
235 gimple
*stmt
= gsi_stmt (*gsi
);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
238 return gsi_remove (gsi
, true);
240 gimple_stmt_iterator prev
= *gsi
;
242 unsigned uid
= gimple_uid (stmt
);
243 basic_block bb
= gimple_bb (stmt
);
244 bool ret
= gsi_remove (gsi
, true);
245 if (!gsi_end_p (prev
))
248 prev
= gsi_start_bb (bb
);
249 gimple
*end_stmt
= gsi_stmt (*gsi
);
250 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
252 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
253 gimple_set_uid (stmt
, uid
);
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
279 propagate_bias_p (gimple
*stmt
)
282 imm_use_iterator use_iter
;
283 gimple
*single_use_stmt
= NULL
;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_reference
)
288 FOR_EACH_IMM_USE_FAST (use
, use_iter
, gimple_assign_lhs (stmt
))
290 gimple
*current_use_stmt
= USE_STMT (use
);
292 if (is_gimple_assign (current_use_stmt
)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt
)) == SSA_NAME
)
295 if (single_use_stmt
!= NULL
&& single_use_stmt
!= current_use_stmt
)
297 single_use_stmt
= current_use_stmt
;
301 if (single_use_stmt
== NULL
)
304 if (gimple_bb (stmt
)->loop_father
305 != gimple_bb (single_use_stmt
)->loop_father
)
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
319 phi_rank (gimple
*stmt
)
321 basic_block bb
= gimple_bb (stmt
);
322 class loop
*father
= bb
->loop_father
;
328 if (!reassoc_bias_loop_carried_phi_ranks_p
)
329 return bb_rank
[bb
->index
];
331 /* We only care about real loops (those with a latch). */
333 return bb_rank
[bb
->index
];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb
!= father
->header
338 return bb_rank
[bb
->index
];
340 /* Ignore virtual SSA_NAMEs. */
341 res
= gimple_phi_result (stmt
);
342 if (virtual_operand_p (res
))
343 return bb_rank
[bb
->index
];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res
, &use
, &use_stmt
)
348 || gimple_bb (use_stmt
)->loop_father
!= father
)
349 return bb_rank
[bb
->index
];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
354 tree arg
= gimple_phi_arg_def (stmt
, i
);
355 if (TREE_CODE (arg
) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
358 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
359 if (gimple_bb (def_stmt
)->loop_father
== father
)
360 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
364 /* Must be an uninteresting phi. */
365 return bb_rank
[bb
->index
];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
373 propagate_rank (int64_t rank
, tree op
, bool *maybe_biased_p
)
377 op_rank
= get_rank (op
);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op
) == SSA_NAME
382 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (op
)))
384 if (maybe_biased_p
== NULL
)
386 *maybe_biased_p
= true;
389 return MAX (rank
, op_rank
);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e
)
397 int64_t *slot
= operand_rank
->get (e
);
398 return slot
? *slot
: -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
404 insert_operand_rank (tree e
, int64_t rank
)
406 gcc_assert (rank
> 0);
407 gcc_assert (!operand_rank
->put (e
, rank
));
410 /* Given an expression E, return the rank of the expression. */
415 /* SSA_NAME's have the rank of the expression they are the result
417 For globals and uninitialized values, the rank is 0.
418 For function arguments, use the pre-setup rank.
419 For PHI nodes, stores, asm statements, etc, we use the rank of
421 For simple operations, the rank is the maximum rank of any of
422 its operands, or the bb_rank, whichever is less.
423 I make no claims that this is optimal, however, it gives good
426 /* We make an exception to the normal ranking system to break
427 dependences of accumulator variables in loops. Suppose we
428 have a simple one-block loop containing:
435 As shown, each iteration of the calculation into x is fully
436 dependent upon the iteration before it. We would prefer to
437 see this in the form:
444 If the loop is unrolled, the calculations of b and c from
445 different iterations can be interleaved.
447 To obtain this result during reassociation, we bias the rank
448 of the phi definition x_1 upward, when it is recognized as an
449 accumulator pattern. The artificial rank causes it to be
450 added last, providing the desired independence. */
452 if (TREE_CODE (e
) == SSA_NAME
)
459 /* If we already have a rank for this expression, use that. */
460 rank
= find_operand_rank (e
);
464 stmt
= SSA_NAME_DEF_STMT (e
);
465 if (gimple_code (stmt
) == GIMPLE_PHI
)
467 rank
= phi_rank (stmt
);
468 if (rank
!= bb_rank
[gimple_bb (stmt
)->index
])
469 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
472 else if (!is_gimple_assign (stmt
))
473 rank
= bb_rank
[gimple_bb (stmt
)->index
];
477 bool biased_p
= false;
478 bool *maybe_biased_p
= propagate_bias_p (stmt
) ? &biased_p
: NULL
;
480 /* Otherwise, find the maximum rank for the operands. As an
481 exception, remove the bias from loop-carried phis when propagating
482 the rank so that dependent operations are not also biased. */
483 /* Simply walk over all SSA uses - this takes advatage of the
484 fact that non-SSA operands are is_gimple_min_invariant and
487 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
488 rank
= propagate_rank (rank
, op
, maybe_biased_p
);
492 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
497 fprintf (dump_file
, "Rank for ");
498 print_generic_expr (dump_file
, e
);
499 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
502 /* Note the rank in the hashtable so we don't recompute it. */
503 insert_operand_rank (e
, rank
);
507 /* Constants, globals, etc., are rank 0 */
512 /* We want integer ones to end up last no matter what, since they are
513 the ones we can do the most with. */
514 #define INTEGER_CONST_TYPE 1 << 4
515 #define FLOAT_ONE_CONST_TYPE 1 << 3
516 #define FLOAT_CONST_TYPE 1 << 2
517 #define OTHER_CONST_TYPE 1 << 1
519 /* Classify an invariant tree into integer, float, or other, so that
520 we can sort them to be near other constants of the same type. */
522 constant_type (tree t
)
524 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
525 return INTEGER_CONST_TYPE
;
526 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
528 /* Sort -1.0 and 1.0 constants last, while in some cases
529 const_binop can't optimize some inexact operations, multiplication
530 by -1.0 or 1.0 can be always merged with others. */
531 if (real_onep (t
) || real_minus_onep (t
))
532 return FLOAT_ONE_CONST_TYPE
;
533 return FLOAT_CONST_TYPE
;
536 return OTHER_CONST_TYPE
;
539 /* qsort comparison function to sort operand entries PA and PB by rank
540 so that the sorted array is ordered by rank in decreasing order. */
542 sort_by_operand_rank (const void *pa
, const void *pb
)
544 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
545 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
547 if (oeb
->rank
!= oea
->rank
)
548 return oeb
->rank
> oea
->rank
? 1 : -1;
550 /* It's nicer for optimize_expression if constants that are likely
551 to fold when added/multiplied/whatever are put next to each
552 other. Since all constants have rank 0, order them by type. */
555 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
556 return constant_type (oea
->op
) - constant_type (oeb
->op
);
558 /* To make sorting result stable, we use unique IDs to determine
560 return oeb
->id
> oea
->id
? 1 : -1;
563 if (TREE_CODE (oea
->op
) != SSA_NAME
)
565 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
566 return oeb
->id
> oea
->id
? 1 : -1;
570 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
573 /* Lastly, make sure the versions that are the same go next to each
575 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
577 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
578 versions of removed SSA_NAMEs, so if possible, prefer to sort
579 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
582 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
583 basic_block bba
= gimple_bb (stmta
);
584 basic_block bbb
= gimple_bb (stmtb
);
587 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
588 but the other might not. */
593 /* If neither is, compare bb_rank. */
594 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
595 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
598 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
599 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
603 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
606 return oeb
->id
> oea
->id
? 1 : -1;
609 /* Add an operand entry to *OPS for the tree operand OP. */
612 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
614 operand_entry
*oe
= operand_entry_pool
.allocate ();
617 oe
->rank
= get_rank (op
);
618 oe
->id
= next_operand_entry_id
++;
620 oe
->stmt_to_insert
= stmt_to_insert
;
624 /* Add an operand entry to *OPS for the tree operand OP with repeat
628 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
629 HOST_WIDE_INT repeat
)
631 operand_entry
*oe
= operand_entry_pool
.allocate ();
634 oe
->rank
= get_rank (op
);
635 oe
->id
= next_operand_entry_id
++;
637 oe
->stmt_to_insert
= NULL
;
640 reassociate_stats
.pows_encountered
++;
643 /* Returns true if we can associate the SSA def OP. */
646 can_reassociate_op_p (tree op
)
648 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
650 /* Uninitialized variables can't participate in reassociation. */
651 if (TREE_CODE (op
) == SSA_NAME
&& ssa_name_maybe_undef_p (op
))
653 /* Make sure asm goto outputs do not participate in reassociation since
654 we have no way to find an insertion place after asm goto. */
655 if (TREE_CODE (op
) == SSA_NAME
656 && gimple_code (SSA_NAME_DEF_STMT (op
)) == GIMPLE_ASM
657 && gimple_asm_nlabels (as_a
<gasm
*> (SSA_NAME_DEF_STMT (op
))) != 0)
662 /* Returns true if we can reassociate operations of TYPE.
663 That is for integral or non-saturating fixed-point types, and for
664 floating point type when associative-math is enabled. */
667 can_reassociate_type_p (tree type
)
669 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
670 || NON_SAT_FIXED_POINT_TYPE_P (type
)
671 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
676 /* Return true if STMT is reassociable operation containing a binary
677 operation with tree code CODE, and is inside LOOP. */
680 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
682 basic_block bb
= gimple_bb (stmt
);
684 if (gimple_bb (stmt
) == NULL
)
687 if (!flow_bb_inside_loop_p (loop
, bb
))
690 if (is_gimple_assign (stmt
)
691 && gimple_assign_rhs_code (stmt
) == code
692 && has_single_use (gimple_assign_lhs (stmt
)))
694 tree rhs1
= gimple_assign_rhs1 (stmt
);
695 tree rhs2
= gimple_assign_rhs2 (stmt
);
696 if (!can_reassociate_op_p (rhs1
)
697 || (rhs2
&& !can_reassociate_op_p (rhs2
)))
706 /* Return true if STMT is a nop-conversion. */
709 gimple_nop_conversion_p (gimple
*stmt
)
711 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
713 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
714 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
715 TREE_TYPE (gimple_assign_rhs1 (ass
))))
721 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
722 operand of the negate operation. Otherwise, return NULL. */
725 get_unary_op (tree name
, enum tree_code opcode
)
727 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
729 /* Look through nop conversions (sign changes). */
730 if (gimple_nop_conversion_p (stmt
)
731 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
732 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
734 if (!is_gimple_assign (stmt
))
737 if (gimple_assign_rhs_code (stmt
) == opcode
)
738 return gimple_assign_rhs1 (stmt
);
742 /* Return true if OP1 and OP2 have the same value if casted to either type. */
745 ops_equal_values_p (tree op1
, tree op2
)
751 if (TREE_CODE (op1
) == SSA_NAME
)
753 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
754 if (gimple_nop_conversion_p (stmt
))
756 op1
= gimple_assign_rhs1 (stmt
);
762 if (TREE_CODE (op2
) == SSA_NAME
)
764 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
765 if (gimple_nop_conversion_p (stmt
))
767 op2
= gimple_assign_rhs1 (stmt
);
778 /* If CURR and LAST are a pair of ops that OPCODE allows us to
779 eliminate through equivalences, do so, remove them from OPS, and
780 return true. Otherwise, return false. */
783 eliminate_duplicate_pair (enum tree_code opcode
,
784 vec
<operand_entry
*> *ops
,
791 /* If we have two of the same op, and the opcode is & |, min, or max,
792 we can eliminate one of them.
793 If we have two of the same op, and the opcode is ^, we can
794 eliminate both of them. */
796 if (last
&& last
->op
== curr
->op
)
804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
806 fprintf (dump_file
, "Equivalence: ");
807 print_generic_expr (dump_file
, curr
->op
);
808 fprintf (dump_file
, " [&|minmax] ");
809 print_generic_expr (dump_file
, last
->op
);
810 fprintf (dump_file
, " -> ");
811 print_generic_stmt (dump_file
, last
->op
);
814 ops
->ordered_remove (i
);
815 reassociate_stats
.ops_eliminated
++;
820 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
822 fprintf (dump_file
, "Equivalence: ");
823 print_generic_expr (dump_file
, curr
->op
);
824 fprintf (dump_file
, " ^ ");
825 print_generic_expr (dump_file
, last
->op
);
826 fprintf (dump_file
, " -> nothing\n");
829 reassociate_stats
.ops_eliminated
+= 2;
831 if (ops
->length () == 2)
834 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
839 ops
->ordered_remove (i
-1);
840 ops
->ordered_remove (i
-1);
852 static vec
<tree
> plus_negates
;
854 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
855 expression, look in OPS for a corresponding positive operation to cancel
856 it out. If we find one, remove the other from OPS, replace
857 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
861 eliminate_plus_minus_pair (enum tree_code opcode
,
862 vec
<operand_entry
*> *ops
,
863 unsigned int currindex
,
871 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
874 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
875 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
876 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
879 /* Any non-negated version will have a rank that is one less than
880 the current rank. So once we hit those ranks, if we don't find
883 for (i
= currindex
+ 1;
884 ops
->iterate (i
, &oe
)
885 && oe
->rank
>= curr
->rank
- 1 ;
889 && ops_equal_values_p (oe
->op
, negateop
))
891 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
893 fprintf (dump_file
, "Equivalence: ");
894 print_generic_expr (dump_file
, negateop
);
895 fprintf (dump_file
, " + -");
896 print_generic_expr (dump_file
, oe
->op
);
897 fprintf (dump_file
, " -> 0\n");
900 ops
->ordered_remove (i
);
901 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
902 ops
->ordered_remove (currindex
);
903 reassociate_stats
.ops_eliminated
++;
908 && ops_equal_values_p (oe
->op
, notop
))
910 tree op_type
= TREE_TYPE (oe
->op
);
912 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
914 fprintf (dump_file
, "Equivalence: ");
915 print_generic_expr (dump_file
, notop
);
916 fprintf (dump_file
, " + ~");
917 print_generic_expr (dump_file
, oe
->op
);
918 fprintf (dump_file
, " -> -1\n");
921 ops
->ordered_remove (i
);
922 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
923 ops
->ordered_remove (currindex
);
924 reassociate_stats
.ops_eliminated
++;
930 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
931 save it for later inspection in repropagate_negates(). */
932 if (negateop
!= NULL_TREE
933 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
934 plus_negates
.safe_push (curr
->op
);
939 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
940 bitwise not expression, look in OPS for a corresponding operand to
941 cancel it out. If we find one, remove the other from OPS, replace
942 OPS[CURRINDEX] with 0, and return true. Otherwise, return
946 eliminate_not_pairs (enum tree_code opcode
,
947 vec
<operand_entry
*> *ops
,
948 unsigned int currindex
,
955 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
956 || TREE_CODE (curr
->op
) != SSA_NAME
)
959 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
960 if (notop
== NULL_TREE
)
963 /* Any non-not version will have a rank that is one less than
964 the current rank. So once we hit those ranks, if we don't find
967 for (i
= currindex
+ 1;
968 ops
->iterate (i
, &oe
)
969 && oe
->rank
>= curr
->rank
- 1;
974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
976 fprintf (dump_file
, "Equivalence: ");
977 print_generic_expr (dump_file
, notop
);
978 if (opcode
== BIT_AND_EXPR
)
979 fprintf (dump_file
, " & ~");
980 else if (opcode
== BIT_IOR_EXPR
)
981 fprintf (dump_file
, " | ~");
982 print_generic_expr (dump_file
, oe
->op
);
983 if (opcode
== BIT_AND_EXPR
)
984 fprintf (dump_file
, " -> 0\n");
985 else if (opcode
== BIT_IOR_EXPR
)
986 fprintf (dump_file
, " -> -1\n");
989 if (opcode
== BIT_AND_EXPR
)
990 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
991 else if (opcode
== BIT_IOR_EXPR
)
992 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
994 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
996 ops
->quick_push (oe
);
1004 /* Use constant value that may be present in OPS to try to eliminate
1005 operands. Note that this function is only really used when we've
1006 eliminated ops for other reasons, or merged constants. Across
1007 single statements, fold already does all of this, plus more. There
1008 is little point in duplicating logic, so I've only included the
1009 identities that I could ever construct testcases to trigger. */
1012 eliminate_using_constants (enum tree_code opcode
,
1013 vec
<operand_entry
*> *ops
)
1015 operand_entry
*oelast
= ops
->last ();
1016 tree type
= TREE_TYPE (oelast
->op
);
1018 if (oelast
->rank
== 0
1019 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
1024 if (integer_zerop (oelast
->op
))
1026 if (ops
->length () != 1)
1028 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1029 fprintf (dump_file
, "Found & 0, removing all other ops\n");
1031 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1034 ops
->quick_push (oelast
);
1038 else if (integer_all_onesp (oelast
->op
))
1040 if (ops
->length () != 1)
1042 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1043 fprintf (dump_file
, "Found & -1, removing\n");
1045 reassociate_stats
.ops_eliminated
++;
1050 if (integer_all_onesp (oelast
->op
))
1052 if (ops
->length () != 1)
1054 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1055 fprintf (dump_file
, "Found | -1, removing all other ops\n");
1057 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1060 ops
->quick_push (oelast
);
1064 else if (integer_zerop (oelast
->op
))
1066 if (ops
->length () != 1)
1068 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1069 fprintf (dump_file
, "Found | 0, removing\n");
1071 reassociate_stats
.ops_eliminated
++;
1076 if (integer_zerop (oelast
->op
)
1077 || (FLOAT_TYPE_P (type
)
1078 && !HONOR_NANS (type
)
1079 && !HONOR_SIGNED_ZEROS (type
)
1080 && real_zerop (oelast
->op
)))
1082 if (ops
->length () != 1)
1084 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1085 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1087 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1089 ops
->quick_push (oelast
);
1093 else if (integer_onep (oelast
->op
)
1094 || (FLOAT_TYPE_P (type
)
1095 && !HONOR_SNANS (type
)
1096 && real_onep (oelast
->op
)))
1098 if (ops
->length () != 1)
1100 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1101 fprintf (dump_file
, "Found * 1, removing\n");
1103 reassociate_stats
.ops_eliminated
++;
1111 if (integer_zerop (oelast
->op
)
1112 || (FLOAT_TYPE_P (type
)
1113 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1114 && fold_real_zero_addition_p (type
, 0, oelast
->op
,
1115 opcode
== MINUS_EXPR
)))
1117 if (ops
->length () != 1)
1119 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1120 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1122 reassociate_stats
.ops_eliminated
++;
1134 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1137 /* Structure for tracking and counting operands. */
1141 enum tree_code oecode
;
1146 /* The heap for the oecount hashtable and the sorted list of operands. */
1147 static vec
<oecount
> cvec
;
1150 /* Oecount hashtable helpers. */
1152 struct oecount_hasher
: int_hash
<int, 0, 1>
1154 static inline hashval_t
hash (int);
1155 static inline bool equal (int, int);
1158 /* Hash function for oecount. */
1161 oecount_hasher::hash (int p
)
1163 const oecount
*c
= &cvec
[p
- 42];
1164 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1167 /* Comparison function for oecount. */
1170 oecount_hasher::equal (int p1
, int p2
)
1172 const oecount
*c1
= &cvec
[p1
- 42];
1173 const oecount
*c2
= &cvec
[p2
- 42];
1174 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1177 /* Comparison function for qsort sorting oecount elements by count. */
1180 oecount_cmp (const void *p1
, const void *p2
)
1182 const oecount
*c1
= (const oecount
*)p1
;
1183 const oecount
*c2
= (const oecount
*)p2
;
1184 if (c1
->cnt
!= c2
->cnt
)
1185 return c1
->cnt
> c2
->cnt
? 1 : -1;
1187 /* If counts are identical, use unique IDs to stabilize qsort. */
1188 return c1
->id
> c2
->id
? 1 : -1;
1191 /* Return TRUE iff STMT represents a builtin call that raises OP
1192 to some exponent. */
1195 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1197 if (!is_gimple_call (stmt
))
1200 switch (gimple_call_combined_fn (stmt
))
1204 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1211 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1212 in place and return the result. Assumes that stmt_is_power_of_op
1213 was previously called for STMT and returned TRUE. */
1215 static HOST_WIDE_INT
1216 decrement_power (gimple
*stmt
)
1218 REAL_VALUE_TYPE c
, cint
;
1219 HOST_WIDE_INT power
;
1222 switch (gimple_call_combined_fn (stmt
))
1225 arg1
= gimple_call_arg (stmt
, 1);
1226 c
= TREE_REAL_CST (arg1
);
1227 power
= real_to_integer (&c
) - 1;
1228 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1229 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1233 arg1
= gimple_call_arg (stmt
, 1);
1234 power
= TREE_INT_CST_LOW (arg1
) - 1;
1235 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1243 /* Replace SSA defined by STMT and replace all its uses with new
1244 SSA. Also return the new SSA. */
1247 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1251 imm_use_iterator iter
;
1252 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1253 tree lhs
= gimple_get_lhs (stmt
);
1255 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1256 gimple_set_lhs (stmt
, new_lhs
);
1258 /* Also need to update GIMPLE_DEBUGs. */
1259 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1261 tree repl
= new_lhs
;
1262 if (is_gimple_debug (use_stmt
))
1264 if (new_debug_lhs
== NULL_TREE
)
1266 new_debug_lhs
= build_debug_expr_decl (TREE_TYPE (lhs
));
1268 = gimple_build_debug_bind (new_debug_lhs
,
1269 build2 (opcode
, TREE_TYPE (lhs
),
1272 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1273 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1274 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1276 repl
= new_debug_lhs
;
1278 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1279 SET_USE (use
, repl
);
1280 update_stmt (use_stmt
);
1285 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1286 uses with new SSAs. Also do this for the stmt that defines DEF
1287 if *DEF is not OP. */
1290 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1291 vec
<gimple
*> &stmts_to_fix
)
1297 && TREE_CODE (*def
) == SSA_NAME
1298 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1299 && gimple_code (stmt
) != GIMPLE_NOP
)
1300 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1302 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1303 make_new_ssa_for_def (stmt
, opcode
, op
);
1306 /* Find the single immediate use of STMT's LHS, and replace it
1307 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1308 replace *DEF with OP as well. */
1311 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1316 gimple_stmt_iterator gsi
;
1318 if (is_gimple_call (stmt
))
1319 lhs
= gimple_call_lhs (stmt
);
1321 lhs
= gimple_assign_lhs (stmt
);
1323 gcc_assert (has_single_use (lhs
));
1324 single_imm_use (lhs
, &use
, &use_stmt
);
1328 if (TREE_CODE (op
) != SSA_NAME
)
1329 update_stmt (use_stmt
);
1330 gsi
= gsi_for_stmt (stmt
);
1331 unlink_stmt_vdef (stmt
);
1332 reassoc_remove_stmt (&gsi
);
1333 release_defs (stmt
);
1336 /* Walks the linear chain with result *DEF searching for an operation
1337 with operand OP and code OPCODE removing that from the chain. *DEF
1338 is updated if there is only one operand but no operation left. */
1341 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1343 tree orig_def
= *def
;
1344 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1345 /* PR72835 - Record the stmt chain that has to be updated such that
1346 we dont use the same LHS when the values computed are different. */
1347 auto_vec
<gimple
*, 64> stmts_to_fix
;
1353 if (opcode
== MULT_EXPR
)
1355 if (stmt_is_power_of_op (stmt
, op
))
1357 if (decrement_power (stmt
) == 1)
1359 if (stmts_to_fix
.length () > 0)
1360 stmts_to_fix
.pop ();
1361 propagate_op_to_single_use (op
, stmt
, def
);
1365 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1367 if (gimple_assign_rhs1 (stmt
) == op
)
1369 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1370 if (stmts_to_fix
.length () > 0)
1371 stmts_to_fix
.pop ();
1372 propagate_op_to_single_use (cst
, stmt
, def
);
1375 else if (integer_minus_onep (op
)
1376 || real_minus_onep (op
))
1378 gimple_assign_set_rhs_code
1379 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1385 name
= gimple_assign_rhs1 (stmt
);
1387 /* If this is the operation we look for and one of the operands
1388 is ours simply propagate the other operand into the stmts
1390 if (gimple_assign_rhs_code (stmt
) == opcode
1392 || gimple_assign_rhs2 (stmt
) == op
))
1395 name
= gimple_assign_rhs2 (stmt
);
1396 if (stmts_to_fix
.length () > 0)
1397 stmts_to_fix
.pop ();
1398 propagate_op_to_single_use (name
, stmt
, def
);
1402 /* We might have a multiply of two __builtin_pow* calls, and
1403 the operand might be hiding in the rightmost one. Likewise
1404 this can happen for a negate. */
1405 if (opcode
== MULT_EXPR
1406 && gimple_assign_rhs_code (stmt
) == opcode
1407 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1408 && has_single_use (gimple_assign_rhs2 (stmt
)))
1410 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1411 if (stmt_is_power_of_op (stmt2
, op
))
1413 if (decrement_power (stmt2
) == 1)
1414 propagate_op_to_single_use (op
, stmt2
, def
);
1416 stmts_to_fix
.safe_push (stmt2
);
1419 else if (is_gimple_assign (stmt2
)
1420 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1422 if (gimple_assign_rhs1 (stmt2
) == op
)
1424 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1425 propagate_op_to_single_use (cst
, stmt2
, def
);
1428 else if (integer_minus_onep (op
)
1429 || real_minus_onep (op
))
1431 stmts_to_fix
.safe_push (stmt2
);
1432 gimple_assign_set_rhs_code
1433 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1439 /* Continue walking the chain. */
1440 gcc_assert (name
!= op
1441 && TREE_CODE (name
) == SSA_NAME
);
1442 stmt
= SSA_NAME_DEF_STMT (name
);
1443 stmts_to_fix
.safe_push (stmt
);
1447 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1448 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1451 /* Returns true if statement S1 dominates statement S2. Like
1452 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1455 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1457 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1459 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1460 SSA_NAME. Assume it lives at the beginning of function and
1461 thus dominates everything. */
1462 if (!bb1
|| s1
== s2
)
1465 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1471 /* PHIs in the same basic block are assumed to be
1472 executed all in parallel, if only one stmt is a PHI,
1473 it dominates the other stmt in the same basic block. */
1474 if (gimple_code (s1
) == GIMPLE_PHI
)
1477 if (gimple_code (s2
) == GIMPLE_PHI
)
1480 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1482 if (gimple_uid (s1
) < gimple_uid (s2
))
1485 if (gimple_uid (s1
) > gimple_uid (s2
))
1488 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1489 unsigned int uid
= gimple_uid (s1
);
1490 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1492 gimple
*s
= gsi_stmt (gsi
);
1493 if (gimple_uid (s
) != uid
)
1502 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1505 /* Insert STMT after INSERT_POINT. */
1508 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1510 gimple_stmt_iterator gsi
;
1513 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1514 bb
= gimple_bb (insert_point
);
1515 else if (!stmt_ends_bb_p (insert_point
))
1517 gsi
= gsi_for_stmt (insert_point
);
1518 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1519 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1522 else if (gimple_code (insert_point
) == GIMPLE_ASM
1523 && gimple_asm_nlabels (as_a
<gasm
*> (insert_point
)) != 0)
1524 /* We have no idea where to insert - it depends on where the
1525 uses will be placed. */
1528 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1529 thus if it must end a basic block, it should be a call that can
1530 throw, or some assignment that can throw. If it throws, the LHS
1531 of it will not be initialized though, so only valid places using
1532 the SSA_NAME should be dominated by the fallthru edge. */
1533 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1534 gsi
= gsi_after_labels (bb
);
1535 if (gsi_end_p (gsi
))
1537 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1538 gimple_set_uid (stmt
,
1539 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1542 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1543 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1546 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1547 the result. Places the statement after the definition of either
1548 OP1 or OP2. Returns the new statement. */
1551 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1553 gimple
*op1def
= NULL
, *op2def
= NULL
;
1554 gimple_stmt_iterator gsi
;
1558 /* Create the addition statement. */
1559 op
= make_ssa_name (type
);
1560 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1562 /* Find an insertion place and insert. */
1563 if (TREE_CODE (op1
) == SSA_NAME
)
1564 op1def
= SSA_NAME_DEF_STMT (op1
);
1565 if (TREE_CODE (op2
) == SSA_NAME
)
1566 op2def
= SSA_NAME_DEF_STMT (op2
);
1567 if ((!op1def
|| gimple_nop_p (op1def
))
1568 && (!op2def
|| gimple_nop_p (op2def
)))
1570 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1571 if (!gsi_end_p (gsi
)
1572 && is_gimple_call (gsi_stmt (gsi
))
1573 && (gimple_call_flags (gsi_stmt (gsi
)) & ECF_RETURNS_TWICE
))
1575 /* Don't add statements before a returns_twice call at the start
1577 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1578 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1580 if (gsi_end_p (gsi
))
1582 gimple_stmt_iterator gsi2
1583 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1584 gimple_set_uid (sum
,
1585 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1588 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1589 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1593 gimple
*insert_point
;
1594 if ((!op1def
|| gimple_nop_p (op1def
))
1595 || (op2def
&& !gimple_nop_p (op2def
)
1596 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1597 insert_point
= op2def
;
1599 insert_point
= op1def
;
1600 insert_stmt_after (sum
, insert_point
);
1607 /* Perform un-distribution of divisions and multiplications.
1608 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1609 to (A + B) / X for real X.
1611 The algorithm is organized as follows.
1613 - First we walk the addition chain *OPS looking for summands that
1614 are defined by a multiplication or a real division. This results
1615 in the candidates bitmap with relevant indices into *OPS.
1617 - Second we build the chains of multiplications or divisions for
1618 these candidates, counting the number of occurrences of (operand, code)
1619 pairs in all of the candidates chains.
1621 - Third we sort the (operand, code) pairs by number of occurrence and
1622 process them starting with the pair with the most uses.
1624 * For each such pair we walk the candidates again to build a
1625 second candidate bitmap noting all multiplication/division chains
1626 that have at least one occurrence of (operand, code).
1628 * We build an alternate addition chain only covering these
1629 candidates with one (operand, code) operation removed from their
1630 multiplication/division chain.
1632 * The first candidate gets replaced by the alternate addition chain
1633 multiplied/divided by the operand.
1635 * All candidate chains get disabled for further processing and
1636 processing of (operand, code) pairs continues.
1638 The alternate addition chains built are re-processed by the main
1639 reassociation algorithm which allows optimizing a * x * y + b * y * x
1640 to (a + b ) * x * y in one invocation of the reassociation pass. */
1643 undistribute_ops_list (enum tree_code opcode
,
1644 vec
<operand_entry
*> *ops
, class loop
*loop
)
1646 unsigned int length
= ops
->length ();
1649 unsigned nr_candidates
, nr_candidates2
;
1650 sbitmap_iterator sbi0
;
1651 vec
<operand_entry
*> *subops
;
1652 bool changed
= false;
1653 unsigned int next_oecount_id
= 0;
1656 || opcode
!= PLUS_EXPR
)
1659 /* Build a list of candidates to process. */
1660 auto_sbitmap
candidates (length
);
1661 bitmap_clear (candidates
);
1663 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1665 enum tree_code dcode
;
1668 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1670 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1671 if (!is_gimple_assign (oe1def
))
1673 dcode
= gimple_assign_rhs_code (oe1def
);
1674 if ((dcode
!= MULT_EXPR
1675 && dcode
!= RDIV_EXPR
)
1676 || !is_reassociable_op (oe1def
, dcode
, loop
))
1679 bitmap_set_bit (candidates
, i
);
1683 if (nr_candidates
< 2)
1686 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1688 fprintf (dump_file
, "searching for un-distribute opportunities ");
1689 print_generic_expr (dump_file
,
1690 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1691 fprintf (dump_file
, " %d\n", nr_candidates
);
1694 /* Build linearized sub-operand lists and the counting table. */
1697 hash_table
<oecount_hasher
> ctable (15);
1699 /* ??? Macro arguments cannot have multi-argument template types in
1700 them. This typedef is needed to workaround that limitation. */
1701 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1702 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1703 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1706 enum tree_code oecode
;
1709 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1710 oecode
= gimple_assign_rhs_code (oedef
);
1711 linearize_expr_tree (&subops
[i
], oedef
,
1712 associative_tree_code (oecode
), false);
1714 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1721 c
.id
= next_oecount_id
++;
1724 idx
= cvec
.length () + 41;
1725 slot
= ctable
.find_slot (idx
, INSERT
);
1733 cvec
[*slot
- 42].cnt
++;
1738 /* Sort the counting table. */
1739 cvec
.qsort (oecount_cmp
);
1741 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1744 fprintf (dump_file
, "Candidates:\n");
1745 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1747 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1748 c
->oecode
== MULT_EXPR
1749 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1750 print_generic_expr (dump_file
, c
->op
);
1751 fprintf (dump_file
, "\n");
1755 /* Process the (operand, code) pairs in order of most occurrence. */
1756 auto_sbitmap
candidates2 (length
);
1757 while (!cvec
.is_empty ())
1759 oecount
*c
= &cvec
.last ();
1763 /* Now collect the operands in the outer chain that contain
1764 the common operand in their inner chain. */
1765 bitmap_clear (candidates2
);
1767 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1770 enum tree_code oecode
;
1772 tree op
= (*ops
)[i
]->op
;
1774 /* If we undistributed in this chain already this may be
1776 if (TREE_CODE (op
) != SSA_NAME
)
1779 oedef
= SSA_NAME_DEF_STMT (op
);
1780 oecode
= gimple_assign_rhs_code (oedef
);
1781 if (oecode
!= c
->oecode
)
1784 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1786 if (oe1
->op
== c
->op
)
1788 bitmap_set_bit (candidates2
, i
);
1795 if (nr_candidates2
>= 2)
1797 operand_entry
*oe1
, *oe2
;
1799 int first
= bitmap_first_set_bit (candidates2
);
1801 /* Build the new addition chain. */
1802 oe1
= (*ops
)[first
];
1803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1805 fprintf (dump_file
, "Building (");
1806 print_generic_expr (dump_file
, oe1
->op
);
1808 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1809 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1815 fprintf (dump_file
, " + ");
1816 print_generic_expr (dump_file
, oe2
->op
);
1818 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1819 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1820 oe1
->op
, oe2
->op
, opcode
);
1821 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1823 oe1
->op
= gimple_get_lhs (sum
);
1826 /* Apply the multiplication/division. */
1827 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1828 oe1
->op
, c
->op
, c
->oecode
);
1829 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1831 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1832 print_generic_expr (dump_file
, c
->op
);
1833 fprintf (dump_file
, "\n");
1836 /* Record it in the addition chain and disable further
1837 undistribution with this op. */
1838 oe1
->op
= gimple_assign_lhs (prod
);
1839 oe1
->rank
= get_rank (oe1
->op
);
1840 subops
[first
].release ();
1848 for (i
= 0; i
< ops
->length (); ++i
)
1849 subops
[i
].release ();
1856 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1857 first: element index for each relevant BIT_FIELD_REF.
1858 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1859 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1862 auto_vec
<v_info_elem
, 32> vec
;
1864 typedef v_info
*v_info_ptr
;
1866 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1868 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1870 const tree tr1
= *((const tree
*) p_i
);
1871 const tree tr2
= *((const tree
*) p_j
);
1872 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1873 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1876 else if (mode1
< mode2
)
1878 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1880 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1885 /* Cleanup hash map for VECTOR information. */
1887 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1889 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1890 it
!= info_map
.end (); ++it
)
1892 v_info_ptr info
= (*it
).second
;
1894 (*it
).second
= NULL
;
1898 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1899 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1901 Vs = (V1 + V2 + ... + Vn)
1902 Vs[0] + Vs[1] + ... + Vs[k]
1904 The basic steps are listed below:
1906 1) Check the addition chain *OPS by looking those summands coming from
1907 VECTOR bit_field_ref on VECTOR type. Put the information into
1908 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1910 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1911 continuous, they can cover the whole VECTOR perfectly without any holes.
1912 Obtain one VECTOR list which contain candidates to be transformed.
1914 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1915 candidates with same mode, build the addition statements for them and
1916 generate BIT_FIELD_REFs accordingly.
1919 The current implementation requires the whole VECTORs should be fully
1920 covered, but it can be extended to support partial, checking adjacent
1921 but not fill the whole, it may need some cost model to define the
1922 boundary to do or not.
1925 undistribute_bitref_for_vector (enum tree_code opcode
,
1926 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1928 if (ops
->length () <= 1)
1931 if (opcode
!= PLUS_EXPR
1932 && opcode
!= MULT_EXPR
1933 && opcode
!= BIT_XOR_EXPR
1934 && opcode
!= BIT_IOR_EXPR
1935 && opcode
!= BIT_AND_EXPR
)
1938 hash_map
<tree
, v_info_ptr
> v_info_map
;
1942 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1943 information into map. */
1944 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1946 enum tree_code dcode
;
1949 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1951 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1952 if (!is_gimple_assign (oe1def
))
1954 dcode
= gimple_assign_rhs_code (oe1def
);
1955 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1958 tree rhs
= gimple_assign_rhs1 (oe1def
);
1959 tree vec
= TREE_OPERAND (rhs
, 0);
1960 tree vec_type
= TREE_TYPE (vec
);
1962 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1965 /* Ignore it if target machine can't support this VECTOR type. */
1966 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1969 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1970 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1973 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1974 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1977 /* The type of BIT_FIELD_REF might not be equal to the element type of
1978 the vector. We want to use a vector type with element type the
1979 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1980 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1982 machine_mode simd_mode
;
1983 unsigned HOST_WIDE_INT size
, nunits
;
1984 unsigned HOST_WIDE_INT elem_size
1985 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1986 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1988 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1990 nunits
= size
/ elem_size
;
1991 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1992 nunits
).exists (&simd_mode
))
1994 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1996 /* Ignore it if target machine can't support this VECTOR type. */
1997 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
2000 /* Check const vector type, constrain BIT_FIELD_REF offset and
2002 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
2005 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
2006 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
2010 tree elem_type
= TREE_TYPE (vec_type
);
2011 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
2012 if (maybe_ne (bit_field_size (rhs
), elem_size
))
2016 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
2019 /* Ignore it if target machine can't support this type of VECTOR
2021 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
2022 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
2026 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
2030 info
->vec_type
= vec_type
;
2032 else if (!types_compatible_p (vec_type
, info
->vec_type
))
2034 info
->vec
.safe_push (std::make_pair (idx
, i
));
2037 /* At least two VECTOR to combine. */
2038 if (v_info_map
.elements () <= 1)
2040 cleanup_vinfo_map (v_info_map
);
2044 /* Verify all VECTOR candidates by checking two conditions:
2045 1) sorted offsets are adjacent, no holes.
2046 2) can fill the whole VECTOR perfectly.
2047 And add the valid candidates to a vector for further handling. */
2048 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
2049 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
2050 it
!= v_info_map
.end (); ++it
)
2052 tree cand_vec
= (*it
).first
;
2053 v_info_ptr cand_info
= (*it
).second
;
2054 unsigned int num_elems
2055 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
2056 if (cand_info
->vec
.length () != num_elems
)
2058 sbitmap holes
= sbitmap_alloc (num_elems
);
2059 bitmap_ones (holes
);
2062 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
2064 if (!bitmap_bit_p (holes
, curr
->first
))
2070 bitmap_clear_bit (holes
, curr
->first
);
2072 if (valid
&& bitmap_empty_p (holes
))
2073 valid_vecs
.quick_push (cand_vec
);
2074 sbitmap_free (holes
);
2077 /* At least two VECTOR to combine. */
2078 if (valid_vecs
.length () <= 1)
2080 cleanup_vinfo_map (v_info_map
);
2084 valid_vecs
.qsort (sort_by_mach_mode
);
2085 /* Go through all candidates by machine mode order, query the mode_to_total
2086 to get the total number for each mode and skip the single one. */
2087 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2089 tree tvec
= valid_vecs
[i
];
2090 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2092 /* Skip modes with only a single candidate. */
2093 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2096 unsigned int idx
, j
;
2098 tree sum_vec
= tvec
;
2099 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2101 tree vec_type
= info_ptr
->vec_type
;
2103 /* Build the sum for all candidates with same mode. */
2106 sum
= build_and_add_sum (vec_type
, sum_vec
,
2107 valid_vecs
[i
+ 1], opcode
);
2108 /* Update the operands only after build_and_add_sum,
2109 so that we don't have to repeat the placement algorithm
2110 of build_and_add_sum. */
2112 && !useless_type_conversion_p (vec_type
, TREE_TYPE (sum_vec
)))
2114 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2115 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2116 tree lhs
= make_ssa_name (vec_type
);
2117 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2118 gimple_set_uid (g
, gimple_uid (sum
));
2119 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2120 gimple_assign_set_rhs1 (sum
, lhs
);
2123 if (!useless_type_conversion_p (vec_type
,
2124 TREE_TYPE (valid_vecs
[i
+ 1])))
2126 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2127 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2129 tree lhs
= make_ssa_name (vec_type
);
2130 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2131 gimple_set_uid (g
, gimple_uid (sum
));
2132 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2133 gimple_assign_set_rhs2 (sum
, lhs
);
2136 sum_vec
= gimple_get_lhs (sum
);
2137 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2138 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2139 /* Update those related ops of current candidate VECTOR. */
2140 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2143 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2144 /* Set this then op definition will get DCEd later. */
2145 gimple_set_visited (def
, true);
2146 if (opcode
== PLUS_EXPR
2147 || opcode
== BIT_XOR_EXPR
2148 || opcode
== BIT_IOR_EXPR
)
2149 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2150 else if (opcode
== MULT_EXPR
)
2151 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2154 gcc_assert (opcode
== BIT_AND_EXPR
);
2156 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2158 (*ops
)[idx
]->rank
= 0;
2160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2162 fprintf (dump_file
, "Generating addition -> ");
2163 print_gimple_stmt (dump_file
, sum
, 0);
2167 while ((i
< valid_vecs
.length () - 1)
2168 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2170 /* Referring to first valid VECTOR with this mode, generate the
2171 BIT_FIELD_REF statements accordingly. */
2172 info_ptr
= *(v_info_map
.get (tvec
));
2174 tree elem_type
= TREE_TYPE (vec_type
);
2175 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2178 tree dst
= make_ssa_name (elem_type
);
2179 tree pos
= bitsize_int (elem
->first
2180 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2181 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2182 TYPE_SIZE (elem_type
), pos
);
2183 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2184 insert_stmt_after (gs
, sum
);
2185 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2186 /* Set this then op definition will get DCEd later. */
2187 gimple_set_visited (def
, true);
2188 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2189 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2190 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2192 fprintf (dump_file
, "Generating bit_field_ref -> ");
2193 print_gimple_stmt (dump_file
, gs
, 0);
2198 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2199 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2201 cleanup_vinfo_map (v_info_map
);
2206 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2207 expression, examine the other OPS to see if any of them are comparisons
2208 of the same values, which we may be able to combine or eliminate.
2209 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2212 eliminate_redundant_comparison (enum tree_code opcode
,
2213 vec
<operand_entry
*> *ops
,
2214 unsigned int currindex
,
2215 operand_entry
*curr
)
2218 enum tree_code lcode
, rcode
;
2219 gimple
*def1
, *def2
;
2223 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2226 /* Check that CURR is a comparison. */
2227 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2229 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2230 if (!is_gimple_assign (def1
))
2232 lcode
= gimple_assign_rhs_code (def1
);
2233 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2235 op1
= gimple_assign_rhs1 (def1
);
2236 op2
= gimple_assign_rhs2 (def1
);
2238 /* Now look for a similar comparison in the remaining OPS. */
2239 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2243 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2245 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2246 if (!is_gimple_assign (def2
))
2248 rcode
= gimple_assign_rhs_code (def2
);
2249 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2252 /* If we got here, we have a match. See if we can combine the
2254 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2255 if (opcode
== BIT_IOR_EXPR
)
2256 t
= maybe_fold_or_comparisons (type
,
2258 rcode
, gimple_assign_rhs1 (def2
),
2259 gimple_assign_rhs2 (def2
));
2261 t
= maybe_fold_and_comparisons (type
,
2263 rcode
, gimple_assign_rhs1 (def2
),
2264 gimple_assign_rhs2 (def2
));
2268 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2269 always give us a boolean_type_node value back. If the original
2270 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2271 we need to convert. */
2272 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2274 if (!fold_convertible_p (TREE_TYPE (curr
->op
), t
))
2276 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2279 if (TREE_CODE (t
) != INTEGER_CST
2280 && !operand_equal_p (t
, curr
->op
, 0))
2282 enum tree_code subcode
;
2283 tree newop1
, newop2
;
2284 if (!COMPARISON_CLASS_P (t
))
2286 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2287 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2288 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2289 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2291 if (lcode
== TREE_CODE (t
)
2292 && operand_equal_p (op1
, newop1
, 0)
2293 && operand_equal_p (op2
, newop2
, 0))
2295 else if ((TREE_CODE (newop1
) == SSA_NAME
2296 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1
))
2297 || (TREE_CODE (newop2
) == SSA_NAME
2298 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2
)))
2302 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2304 fprintf (dump_file
, "Equivalence: ");
2305 print_generic_expr (dump_file
, curr
->op
);
2306 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2307 print_generic_expr (dump_file
, oe
->op
);
2308 fprintf (dump_file
, " -> ");
2309 print_generic_expr (dump_file
, t
);
2310 fprintf (dump_file
, "\n");
2313 /* Now we can delete oe, as it has been subsumed by the new combined
2315 ops
->ordered_remove (i
);
2316 reassociate_stats
.ops_eliminated
++;
2318 /* If t is the same as curr->op, we're done. Otherwise we must
2319 replace curr->op with t. Special case is if we got a constant
2320 back, in which case we add it to the end instead of in place of
2321 the current entry. */
2322 if (TREE_CODE (t
) == INTEGER_CST
)
2324 ops
->ordered_remove (currindex
);
2325 add_to_ops_vec (ops
, t
);
2327 else if (!operand_equal_p (t
, curr
->op
, 0))
2330 enum tree_code subcode
;
2333 gcc_assert (COMPARISON_CLASS_P (t
));
2334 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2335 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2336 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2337 gcc_checking_assert (is_gimple_val (newop1
)
2338 && is_gimple_val (newop2
));
2339 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2340 curr
->op
= gimple_get_lhs (sum
);
2349 /* Transform repeated addition of same values into multiply with
2352 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2355 tree op
= NULL_TREE
;
2357 int i
, start
= -1, end
= 0, count
= 0;
2358 auto_vec
<std::pair
<int, int> > indxs
;
2359 bool changed
= false;
2361 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2362 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2363 || !flag_unsafe_math_optimizations
))
2366 /* Look for repeated operands. */
2367 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2375 else if (operand_equal_p (oe
->op
, op
, 0))
2383 indxs
.safe_push (std::make_pair (start
, end
));
2391 indxs
.safe_push (std::make_pair (start
, end
));
2393 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2395 /* Convert repeated operand addition to multiplication. */
2396 start
= indxs
[j
].first
;
2397 end
= indxs
[j
].second
;
2398 op
= (*ops
)[start
]->op
;
2399 count
= end
- start
+ 1;
2400 for (i
= end
; i
>= start
; --i
)
2401 ops
->unordered_remove (i
);
2402 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2403 tree cst
= build_int_cst (integer_type_node
, count
);
2405 = gimple_build_assign (tmp
, MULT_EXPR
,
2406 op
, fold_convert (TREE_TYPE (op
), cst
));
2407 gimple_set_visited (mul_stmt
, true);
2408 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2416 /* Perform various identities and other optimizations on the list of
2417 operand entries, stored in OPS. The tree code for the binary
2418 operation between all the operands is OPCODE. */
2421 optimize_ops_list (enum tree_code opcode
,
2422 vec
<operand_entry
*> *ops
)
2424 unsigned int length
= ops
->length ();
2427 operand_entry
*oelast
= NULL
;
2428 bool iterate
= false;
2433 oelast
= ops
->last ();
2435 /* If the last two are constants, pop the constants off, merge them
2436 and try the next two. */
2437 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2439 operand_entry
*oelm1
= (*ops
)[length
- 2];
2441 if (oelm1
->rank
== 0
2442 && is_gimple_min_invariant (oelm1
->op
)
2443 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2444 TREE_TYPE (oelast
->op
)))
2446 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2447 oelm1
->op
, oelast
->op
);
2449 if (folded
&& is_gimple_min_invariant (folded
))
2451 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2452 fprintf (dump_file
, "Merging constants\n");
2457 add_to_ops_vec (ops
, folded
);
2458 reassociate_stats
.constants_eliminated
++;
2460 optimize_ops_list (opcode
, ops
);
2466 eliminate_using_constants (opcode
, ops
);
2469 for (i
= 0; ops
->iterate (i
, &oe
);)
2473 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2475 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2476 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2477 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2490 optimize_ops_list (opcode
, ops
);
2493 /* The following functions are subroutines to optimize_range_tests and allow
2494 it to try to change a logical combination of comparisons into a range
2498 X == 2 || X == 5 || X == 3 || X == 4
2502 (unsigned) (X - 2) <= 3
2504 For more information see comments above fold_test_range in fold-const.cc,
2505 this implementation is for GIMPLE. */
2509 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2512 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2515 print_generic_expr (file
, r
->exp
);
2516 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2517 print_generic_expr (file
, r
->low
);
2519 print_generic_expr (file
, r
->high
);
2523 /* Dump the range entry R to STDERR. */
2526 debug_range_entry (struct range_entry
*r
)
2528 dump_range_entry (stderr
, r
, false);
2529 fputc ('\n', stderr
);
2532 /* This is similar to make_range in fold-const.cc, but on top of
2533 GIMPLE instead of trees. If EXP is non-NULL, it should be
2534 an SSA_NAME and STMT argument is ignored, otherwise STMT
2535 argument should be a GIMPLE_COND. */
2538 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2542 bool is_bool
, strict_overflow_p
;
2546 r
->strict_overflow_p
= false;
2548 r
->high
= NULL_TREE
;
2549 if (exp
!= NULL_TREE
2550 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2553 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2554 and see if we can refine the range. Some of the cases below may not
2555 happen, but it doesn't seem worth worrying about this. We "continue"
2556 the outer loop when we've changed something; otherwise we "break"
2557 the switch, which will "break" the while. */
2558 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2561 strict_overflow_p
= false;
2563 if (exp
== NULL_TREE
)
2565 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2567 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2572 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2577 enum tree_code code
;
2578 tree arg0
, arg1
, exp_type
;
2582 if (exp
!= NULL_TREE
)
2584 if (TREE_CODE (exp
) != SSA_NAME
2585 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2588 stmt
= SSA_NAME_DEF_STMT (exp
);
2589 if (!is_gimple_assign (stmt
))
2592 code
= gimple_assign_rhs_code (stmt
);
2593 arg0
= gimple_assign_rhs1 (stmt
);
2594 arg1
= gimple_assign_rhs2 (stmt
);
2595 exp_type
= TREE_TYPE (exp
);
2599 code
= gimple_cond_code (stmt
);
2600 arg0
= gimple_cond_lhs (stmt
);
2601 arg1
= gimple_cond_rhs (stmt
);
2602 exp_type
= boolean_type_node
;
2605 if (TREE_CODE (arg0
) != SSA_NAME
2606 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
)
2607 || ssa_name_maybe_undef_p (arg0
))
2609 loc
= gimple_location (stmt
);
2613 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2614 /* Ensure the range is either +[-,0], +[0,0],
2615 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2616 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2617 or similar expression of unconditional true or
2618 false, it should not be negated. */
2619 && ((high
&& integer_zerop (high
))
2620 || (low
&& integer_onep (low
))))
2633 if ((TYPE_PRECISION (exp_type
) == 1
2634 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2635 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2638 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2640 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2645 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2660 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2662 &strict_overflow_p
);
2663 if (nexp
!= NULL_TREE
)
2666 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2679 r
->strict_overflow_p
= strict_overflow_p
;
2683 /* Comparison function for qsort. Sort entries
2684 without SSA_NAME exp first, then with SSA_NAMEs sorted
2685 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2686 by increasing ->low and if ->low is the same, by increasing
2687 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2691 range_entry_cmp (const void *a
, const void *b
)
2693 const struct range_entry
*p
= (const struct range_entry
*) a
;
2694 const struct range_entry
*q
= (const struct range_entry
*) b
;
2696 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2698 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2700 /* Group range_entries for the same SSA_NAME together. */
2701 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2703 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2705 /* If ->low is different, NULL low goes first, then by
2707 if (p
->low
!= NULL_TREE
)
2709 if (q
->low
!= NULL_TREE
)
2711 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2713 if (tem
&& integer_onep (tem
))
2715 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2717 if (tem
&& integer_onep (tem
))
2723 else if (q
->low
!= NULL_TREE
)
2725 /* If ->high is different, NULL high goes last, before that by
2727 if (p
->high
!= NULL_TREE
)
2729 if (q
->high
!= NULL_TREE
)
2731 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2733 if (tem
&& integer_onep (tem
))
2735 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2737 if (tem
&& integer_onep (tem
))
2743 else if (q
->high
!= NULL_TREE
)
2745 /* If both ranges are the same, sort below by ascending idx. */
2750 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2753 if (p
->idx
< q
->idx
)
2757 gcc_checking_assert (p
->idx
> q
->idx
);
2762 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2763 insert needed statements BEFORE or after GSI. */
2766 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2768 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2769 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2770 if (TREE_CODE (ret
) != SSA_NAME
)
2772 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2774 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2776 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2777 ret
= gimple_assign_lhs (g
);
2782 /* Helper routine of optimize_range_test.
2783 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2784 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2785 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2786 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2787 an array of COUNT pointers to other ranges. Return
2788 true if the range merge has been successful.
2789 If OPCODE is ERROR_MARK, this is called from within
2790 maybe_optimize_range_tests and is performing inter-bb range optimization.
2791 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2795 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2796 struct range_entry
**otherrangep
,
2797 unsigned int count
, enum tree_code opcode
,
2798 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2799 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2801 unsigned int idx
= range
->idx
;
2802 struct range_entry
*swap_with
= NULL
;
2803 basic_block rewrite_bb_first
= NULL
, rewrite_bb_last
= NULL
;
2804 if (opcode
== ERROR_MARK
)
2806 /* For inter-bb range test optimization, pick from the range tests
2807 the one which is tested in the earliest condition (one dominating
2808 the others), because otherwise there could be some UB (e.g. signed
2809 overflow) in following bbs that we'd expose which wasn't there in
2810 the original program. See PR104196. */
2811 basic_block orig_range_bb
= BASIC_BLOCK_FOR_FN (cfun
, (*ops
)[idx
]->id
);
2812 basic_block range_bb
= orig_range_bb
;
2813 for (unsigned int i
= 0; i
< count
; i
++)
2815 struct range_entry
*this_range
;
2817 this_range
= otherrange
+ i
;
2819 this_range
= otherrangep
[i
];
2820 operand_entry
*oe
= (*ops
)[this_range
->idx
];
2821 basic_block this_bb
= BASIC_BLOCK_FOR_FN (cfun
, oe
->id
);
2822 if (range_bb
!= this_bb
2823 && dominated_by_p (CDI_DOMINATORS
, range_bb
, this_bb
))
2825 swap_with
= this_range
;
2827 idx
= this_range
->idx
;
2830 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2831 only defined in later blocks. In this case we can't move the
2832 merged comparison earlier, so instead check if there are any stmts
2833 that might trigger signed integer overflow in between and rewrite
2834 them. But only after we check if the optimization is possible. */
2835 if (seq
&& swap_with
)
2837 rewrite_bb_first
= range_bb
;
2838 rewrite_bb_last
= orig_range_bb
;
2843 operand_entry
*oe
= (*ops
)[idx
];
2845 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2846 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2847 location_t loc
= gimple_location (stmt
);
2848 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2849 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2851 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2852 gimple_stmt_iterator gsi
;
2853 unsigned int i
, uid
;
2855 if (tem
== NULL_TREE
)
2858 /* If op is default def SSA_NAME, there is no place to insert the
2859 new comparison. Give up, unless we can use OP itself as the
2861 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2863 if (op
== range
->exp
2864 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2865 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2867 || (TREE_CODE (tem
) == EQ_EXPR
2868 && TREE_OPERAND (tem
, 0) == op
2869 && integer_onep (TREE_OPERAND (tem
, 1))))
2870 && opcode
!= BIT_IOR_EXPR
2871 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2881 std::swap (range
->idx
, swap_with
->idx
);
2883 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2884 warning_at (loc
, OPT_Wstrict_overflow
,
2885 "assuming signed overflow does not occur "
2886 "when simplifying range test");
2888 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2890 struct range_entry
*r
;
2891 fprintf (dump_file
, "Optimizing range tests ");
2892 dump_range_entry (dump_file
, range
, false);
2893 for (i
= 0; i
< count
; i
++)
2900 && r
->exp
!= range
->exp
2901 && TREE_CODE (r
->exp
) == SSA_NAME
)
2903 fprintf (dump_file
, " and ");
2904 dump_range_entry (dump_file
, r
, false);
2908 fprintf (dump_file
, " and");
2909 dump_range_entry (dump_file
, r
, true);
2912 fprintf (dump_file
, "\n into ");
2913 print_generic_expr (dump_file
, tem
);
2914 fprintf (dump_file
, "\n");
2917 /* In inter-bb range optimization mode, if we have a seq, we can't
2918 move the merged comparison to the earliest bb from the comparisons
2919 being replaced, so instead rewrite stmts that could trigger signed
2920 integer overflow. */
2921 for (basic_block bb
= rewrite_bb_last
;
2922 bb
!= rewrite_bb_first
; bb
= single_pred (bb
))
2923 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
2924 !gsi_end_p (gsi
); gsi_next (&gsi
))
2926 gimple
*stmt
= gsi_stmt (gsi
);
2927 if (is_gimple_assign (stmt
))
2928 if (tree lhs
= gimple_assign_lhs (stmt
))
2929 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2930 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2931 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
2933 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2934 if (arith_code_with_undefined_signed_overflow (code
))
2936 gimple_stmt_iterator gsip
= gsi
;
2937 gimple_stmt_iterator gsin
= gsi
;
2940 rewrite_to_defined_overflow (&gsi
);
2941 unsigned uid
= gimple_uid (stmt
);
2942 if (gsi_end_p (gsip
))
2943 gsip
= gsi_after_labels (bb
);
2946 for (; gsi_stmt (gsip
) != gsi_stmt (gsin
);
2948 gimple_set_uid (gsi_stmt (gsip
), uid
);
2953 if (opcode
== BIT_IOR_EXPR
2954 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2955 tem
= invert_truthvalue_loc (loc
, tem
);
2957 tem
= fold_convert_loc (loc
, optype
, tem
);
2960 gsi
= gsi_for_stmt (stmt
);
2961 uid
= gimple_uid (stmt
);
2969 gcc_checking_assert (tem
== op
);
2970 /* In rare cases range->exp can be equal to lhs of stmt.
2971 In that case we have to insert after the stmt rather then before
2972 it. If stmt is a PHI, insert it at the start of the basic block. */
2973 else if (op
!= range
->exp
)
2975 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2976 tem
= force_into_ssa_name (&gsi
, tem
, true);
2979 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2981 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2982 tem
= force_into_ssa_name (&gsi
, tem
, false);
2986 gsi
= gsi_after_labels (gimple_bb (stmt
));
2987 if (!gsi_end_p (gsi
))
2988 uid
= gimple_uid (gsi_stmt (gsi
));
2991 gsi
= gsi_start_bb (gimple_bb (stmt
));
2993 while (!gsi_end_p (gsi
))
2995 uid
= gimple_uid (gsi_stmt (gsi
));
2999 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3000 tem
= force_into_ssa_name (&gsi
, tem
, true);
3001 if (gsi_end_p (gsi
))
3002 gsi
= gsi_last_bb (gimple_bb (stmt
));
3006 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
3007 if (gimple_uid (gsi_stmt (gsi
)))
3010 gimple_set_uid (gsi_stmt (gsi
), uid
);
3017 range
->strict_overflow_p
= false;
3019 for (i
= 0; i
< count
; i
++)
3022 range
= otherrange
+ i
;
3024 range
= otherrangep
[i
];
3025 oe
= (*ops
)[range
->idx
];
3026 /* Now change all the other range test immediate uses, so that
3027 those tests will be optimized away. */
3028 if (opcode
== ERROR_MARK
)
3031 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3032 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3034 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3035 ? boolean_false_node
: boolean_true_node
);
3038 oe
->op
= error_mark_node
;
3039 range
->exp
= NULL_TREE
;
3040 range
->low
= NULL_TREE
;
3041 range
->high
= NULL_TREE
;
3046 /* Optimize X == CST1 || X == CST2
3047 if popcount (CST1 ^ CST2) == 1 into
3048 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3049 Similarly for ranges. E.g.
3050 X != 2 && X != 3 && X != 10 && X != 11
3051 will be transformed by the previous optimization into
3052 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3053 and this loop can transform that into
3054 !(((X & ~8) - 2U) <= 1U). */
3057 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
3058 tree lowi
, tree lowj
, tree highi
, tree highj
,
3059 vec
<operand_entry
*> *ops
,
3060 struct range_entry
*rangei
,
3061 struct range_entry
*rangej
)
3063 tree lowxor
, highxor
, tem
, exp
;
3064 /* Check lowi ^ lowj == highi ^ highj and
3065 popcount (lowi ^ lowj) == 1. */
3066 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
3067 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
3069 if (!integer_pow2p (lowxor
))
3071 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
3072 if (!tree_int_cst_equal (lowxor
, highxor
))
3076 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3077 int prec
= GET_MODE_PRECISION (mode
);
3078 if (TYPE_PRECISION (type
) < prec
3079 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3080 != wi::min_value (prec
, TYPE_SIGN (type
)))
3081 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3082 != wi::max_value (prec
, TYPE_SIGN (type
))))
3084 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
3085 exp
= fold_convert (type
, exp
);
3086 lowxor
= fold_convert (type
, lowxor
);
3087 lowi
= fold_convert (type
, lowi
);
3088 highi
= fold_convert (type
, highi
);
3090 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
3091 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
3092 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
3093 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
3094 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
3095 NULL
, rangei
->in_p
, lowj
, highj
,
3096 rangei
->strict_overflow_p
3097 || rangej
->strict_overflow_p
))
3102 /* Optimize X == CST1 || X == CST2
3103 if popcount (CST2 - CST1) == 1 into
3104 ((X - CST1) & ~(CST2 - CST1)) == 0.
3105 Similarly for ranges. E.g.
3106 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3107 || X == 75 || X == 45
3108 will be transformed by the previous optimization into
3109 (X - 43U) <= 3U || (X - 75U) <= 3U
3110 and this loop can transform that into
3111 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3113 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
3114 tree lowi
, tree lowj
, tree highi
, tree highj
,
3115 vec
<operand_entry
*> *ops
,
3116 struct range_entry
*rangei
,
3117 struct range_entry
*rangej
)
3119 tree tem1
, tem2
, mask
;
3120 /* Check highi - lowi == highj - lowj. */
3121 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
3122 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3124 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
3125 if (!tree_int_cst_equal (tem1
, tem2
))
3127 /* Check popcount (lowj - lowi) == 1. */
3128 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
3129 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3131 if (!integer_pow2p (tem1
))
3134 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3135 int prec
= GET_MODE_PRECISION (mode
);
3136 if (TYPE_PRECISION (type
) < prec
3137 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3138 != wi::min_value (prec
, TYPE_SIGN (type
)))
3139 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3140 != wi::max_value (prec
, TYPE_SIGN (type
))))
3141 type
= build_nonstandard_integer_type (prec
, 1);
3143 type
= unsigned_type_for (type
);
3144 tem1
= fold_convert (type
, tem1
);
3145 tem2
= fold_convert (type
, tem2
);
3146 lowi
= fold_convert (type
, lowi
);
3147 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
3148 tem1
= fold_build2 (MINUS_EXPR
, type
,
3149 fold_convert (type
, rangei
->exp
), lowi
);
3150 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
3151 lowj
= build_int_cst (type
, 0);
3152 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
3153 NULL
, rangei
->in_p
, lowj
, tem2
,
3154 rangei
->strict_overflow_p
3155 || rangej
->strict_overflow_p
))
3160 /* It does some common checks for function optimize_range_tests_xor and
3161 optimize_range_tests_diff.
3162 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3163 Else it calls optimize_range_tests_diff. */
3166 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
3167 bool optimize_xor
, vec
<operand_entry
*> *ops
,
3168 struct range_entry
*ranges
)
3171 bool any_changes
= false;
3172 for (i
= first
; i
< length
; i
++)
3174 tree lowi
, highi
, lowj
, highj
, type
, tem
;
3176 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3178 type
= TREE_TYPE (ranges
[i
].exp
);
3179 if (!INTEGRAL_TYPE_P (type
))
3181 lowi
= ranges
[i
].low
;
3182 if (lowi
== NULL_TREE
)
3183 lowi
= TYPE_MIN_VALUE (type
);
3184 highi
= ranges
[i
].high
;
3185 if (highi
== NULL_TREE
)
3187 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3190 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3192 lowj
= ranges
[j
].low
;
3193 if (lowj
== NULL_TREE
)
3195 highj
= ranges
[j
].high
;
3196 if (highj
== NULL_TREE
)
3197 highj
= TYPE_MAX_VALUE (type
);
3198 /* Check lowj > highi. */
3199 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3201 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3204 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3206 ranges
+ i
, ranges
+ j
);
3208 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3210 ranges
+ i
, ranges
+ j
);
3221 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3222 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3223 EXP on success, NULL otherwise. */
3226 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3227 wide_int
*mask
, tree
*totallowp
)
3229 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3230 if (tem
== NULL_TREE
3231 || TREE_CODE (tem
) != INTEGER_CST
3232 || TREE_OVERFLOW (tem
)
3233 || tree_int_cst_sgn (tem
) == -1
3234 || compare_tree_int (tem
, prec
) != -1)
3237 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3238 *mask
= wi::shifted_mask (0, max
, false, prec
);
3239 if (TREE_CODE (exp
) == BIT_AND_EXPR
3240 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3242 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3243 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3244 if (wi::popcount (msk
) == 1
3245 && wi::ltu_p (msk
, prec
- max
))
3247 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3248 max
+= msk
.to_uhwi ();
3249 exp
= TREE_OPERAND (exp
, 0);
3250 if (integer_zerop (low
)
3251 && TREE_CODE (exp
) == PLUS_EXPR
3252 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3254 tree ret
= TREE_OPERAND (exp
, 0);
3257 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3258 TYPE_PRECISION (TREE_TYPE (low
))));
3259 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3265 else if (!tree_int_cst_lt (totallow
, tbias
))
3267 bias
= wi::to_widest (tbias
);
3268 bias
-= wi::to_widest (totallow
);
3269 if (bias
>= 0 && bias
< prec
- max
)
3271 *mask
= wi::lshift (*mask
, bias
);
3279 if (!tree_int_cst_lt (totallow
, low
))
3281 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3282 if (tem
== NULL_TREE
3283 || TREE_CODE (tem
) != INTEGER_CST
3284 || TREE_OVERFLOW (tem
)
3285 || compare_tree_int (tem
, prec
- max
) == 1)
3288 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3292 /* Attempt to optimize small range tests using bit test.
3294 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3295 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3296 has been by earlier optimizations optimized into:
3297 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3298 As all the 43 through 82 range is less than 64 numbers,
3299 for 64-bit word targets optimize that into:
3300 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3303 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3304 vec
<operand_entry
*> *ops
,
3305 struct range_entry
*ranges
)
3308 bool any_changes
= false;
3309 int prec
= GET_MODE_BITSIZE (word_mode
);
3310 auto_vec
<struct range_entry
*, 64> candidates
;
3312 for (i
= first
; i
< length
- 1; i
++)
3314 tree lowi
, highi
, lowj
, highj
, type
;
3316 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3318 type
= TREE_TYPE (ranges
[i
].exp
);
3319 if (!INTEGRAL_TYPE_P (type
))
3321 lowi
= ranges
[i
].low
;
3322 if (lowi
== NULL_TREE
)
3323 lowi
= TYPE_MIN_VALUE (type
);
3324 highi
= ranges
[i
].high
;
3325 if (highi
== NULL_TREE
)
3328 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3329 highi
, &mask
, &lowi
);
3330 if (exp
== NULL_TREE
)
3332 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3333 candidates
.truncate (0);
3334 int end
= MIN (i
+ 64, length
);
3335 for (j
= i
+ 1; j
< end
; j
++)
3338 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3340 if (ranges
[j
].exp
== exp
)
3342 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3344 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3347 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3349 exp2
= TREE_OPERAND (exp2
, 0);
3359 lowj
= ranges
[j
].low
;
3360 if (lowj
== NULL_TREE
)
3362 highj
= ranges
[j
].high
;
3363 if (highj
== NULL_TREE
)
3364 highj
= TYPE_MAX_VALUE (type
);
3366 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3367 highj
, &mask2
, NULL
);
3371 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3372 candidates
.safe_push (&ranges
[j
]);
3375 /* If every possible relative value of the expression is a valid shift
3376 amount, then we can merge the entry test in the bit test. In this
3377 case, if we would need otherwise 2 or more comparisons, then use
3378 the bit test; in the other cases, the threshold is 3 comparisons. */
3379 bool entry_test_needed
;
3381 if (TREE_CODE (exp
) == SSA_NAME
3382 && get_range_query (cfun
)->range_of_expr (r
, exp
)
3383 && !r
.undefined_p ()
3385 && wi::leu_p (r
.upper_bound () - r
.lower_bound (), prec
- 1))
3387 wide_int min
= r
.lower_bound ();
3388 wide_int ilowi
= wi::to_wide (lowi
);
3389 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3391 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3392 mask
= wi::lshift (mask
, ilowi
- min
);
3394 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3396 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3397 mask
= wi::lrshift (mask
, min
- ilowi
);
3399 entry_test_needed
= false;
3402 entry_test_needed
= true;
3403 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3405 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3406 wi::to_widest (lowi
)
3407 + prec
- 1 - wi::clz (mask
));
3408 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3410 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3411 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3413 location_t loc
= gimple_location (stmt
);
3414 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3416 /* See if it isn't cheaper to pretend the minimum value of the
3417 range is 0, if maximum value is small enough.
3418 We can avoid then subtraction of the minimum value, but the
3419 mask constant could be perhaps more expensive. */
3420 if (compare_tree_int (lowi
, 0) > 0
3421 && compare_tree_int (high
, prec
) < 0
3422 && (entry_test_needed
|| wi::ltu_p (r
.upper_bound (), prec
)))
3425 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3426 rtx reg
= gen_raw_REG (word_mode
, 10000);
3427 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3428 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3430 word_mode
, speed_p
);
3431 rtx r
= immed_wide_int_const (mask
, word_mode
);
3432 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3433 word_mode
, speed_p
);
3434 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3435 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3436 word_mode
, speed_p
);
3439 mask
= wi::lshift (mask
, m
);
3440 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3445 if (entry_test_needed
)
3447 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3449 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3454 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3455 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3456 fold_convert_loc (loc
, etype
, exp
),
3457 fold_convert_loc (loc
, etype
, lowi
));
3458 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3459 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3460 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3461 build_int_cst (word_type
, 1), exp
);
3462 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3463 wide_int_to_tree (word_type
, mask
));
3464 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3465 build_zero_cst (word_type
));
3466 if (is_gimple_val (exp
))
3469 /* The shift might have undefined behavior if TEM is true,
3470 but reassociate_bb isn't prepared to have basic blocks
3471 split when it is running. So, temporarily emit a code
3472 with BIT_IOR_EXPR instead of &&, and fix it up in
3474 gimple_seq seq
= NULL
;
3477 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3478 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3479 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3482 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3483 gimple_seq_add_seq_without_update (&seq
, seq2
);
3484 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3485 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3488 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3489 BIT_IOR_EXPR
, tem
, exp
);
3490 gimple_set_location (g
, loc
);
3491 gimple_seq_add_stmt_without_update (&seq
, g
);
3492 exp
= gimple_assign_lhs (g
);
3494 tree val
= build_zero_cst (optype
);
3495 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3496 candidates
.length (), opcode
, ops
, exp
,
3497 seq
, false, val
, val
, strict_overflow_p
))
3501 reassoc_branch_fixups
.safe_push (tem
);
3504 gimple_seq_discard (seq
);
3510 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3511 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3512 Also, handle x < C && y < C && z < C where C is power of two as
3513 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3514 as (x | y | z) < 0. */
3517 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3518 vec
<operand_entry
*> *ops
,
3519 struct range_entry
*ranges
)
3523 bool any_changes
= false;
3524 auto_vec
<int, 128> buckets
;
3525 auto_vec
<int, 32> chains
;
3526 auto_vec
<struct range_entry
*, 32> candidates
;
3528 for (i
= first
; i
< length
; i
++)
3532 if (ranges
[i
].exp
== NULL_TREE
3533 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3534 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3535 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3538 if (ranges
[i
].low
!= NULL_TREE
3539 && ranges
[i
].high
!= NULL_TREE
3541 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3543 idx
= !integer_zerop (ranges
[i
].low
);
3544 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3547 else if (ranges
[i
].high
!= NULL_TREE
3548 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3551 wide_int w
= wi::to_wide (ranges
[i
].high
);
3552 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3553 int l
= wi::clz (w
);
3557 || w
!= wi::mask (prec
- l
, false, prec
))
3559 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3560 && ranges
[i
].low
== NULL_TREE
)
3562 && integer_zerop (ranges
[i
].low
))))
3565 else if (ranges
[i
].high
== NULL_TREE
3566 && ranges
[i
].low
!= NULL_TREE
3567 /* Perform this optimization only in the last
3568 reassoc pass, as it interferes with the reassociation
3569 itself or could also with VRP etc. which might not
3570 be able to virtually undo the optimization. */
3571 && !reassoc_insert_powi_p
3572 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3573 && integer_zerop (ranges
[i
].low
))
3578 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3579 if (buckets
.length () <= b
)
3580 buckets
.safe_grow_cleared (b
+ 1, true);
3581 if (chains
.length () <= (unsigned) i
)
3582 chains
.safe_grow (i
+ 1, true);
3583 chains
[i
] = buckets
[b
];
3587 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3588 if (i
&& chains
[i
- 1])
3593 /* When ranges[X - 1].high + 1 is a power of two,
3594 we need to process the same bucket up to
3595 precision - 1 times, each time split the entries
3596 with the same high bound into one chain and the
3597 rest into another one to be processed later. */
3600 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3602 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3603 ranges
[j
- 1].high
))
3605 chains
[this_prev
- 1] = j
;
3608 else if (other_prev
== 0)
3615 chains
[other_prev
- 1] = j
;
3619 chains
[this_prev
- 1] = 0;
3621 chains
[other_prev
- 1] = 0;
3622 if (chains
[i
- 1] == 0)
3629 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3631 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3632 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3633 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3636 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3637 tree type2
= NULL_TREE
;
3638 bool strict_overflow_p
= false;
3639 candidates
.truncate (0);
3640 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3641 type1
= pointer_sized_int_node
;
3642 for (j
= i
; j
; j
= chains
[j
- 1])
3644 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3645 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3646 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3647 type
= pointer_sized_int_node
;
3650 /* For the signed < 0 cases, the types should be
3651 really compatible (all signed with the same precision,
3652 instead put ranges that have different in_p from
3654 if (!useless_type_conversion_p (type1
, type
))
3656 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3657 candidates
.safe_push (&ranges
[j
- 1]);
3662 || useless_type_conversion_p (type1
, type
))
3664 else if (type2
== NULL_TREE
3665 || useless_type_conversion_p (type2
, type
))
3667 if (type2
== NULL_TREE
)
3669 candidates
.safe_push (&ranges
[j
- 1]);
3672 unsigned l
= candidates
.length ();
3673 for (j
= i
; j
; j
= chains
[j
- 1])
3675 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3678 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3679 type
= pointer_sized_int_node
;
3682 if (!useless_type_conversion_p (type1
, type
))
3684 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3685 candidates
.safe_push (&ranges
[j
- 1]);
3688 if (useless_type_conversion_p (type1
, type
))
3690 else if (type2
== NULL_TREE
3691 || useless_type_conversion_p (type2
, type
))
3693 candidates
.safe_push (&ranges
[j
- 1]);
3695 gimple_seq seq
= NULL
;
3696 tree op
= NULL_TREE
;
3698 struct range_entry
*r
;
3699 candidates
.safe_push (&ranges
[k
- 1]);
3700 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3703 enum tree_code code
;
3710 || POINTER_TYPE_P (TREE_TYPE (op
))
3711 || TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3713 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3714 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3715 if (code
== BIT_NOT_EXPR
3716 && TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3718 g
= gimple_build_assign (make_ssa_name (type3
),
3720 gimple_seq_add_stmt_without_update (&seq
, g
);
3721 op
= gimple_assign_lhs (g
);
3723 g
= gimple_build_assign (make_ssa_name (type3
), code
, op
);
3724 gimple_seq_add_stmt_without_update (&seq
, g
);
3725 op
= gimple_assign_lhs (g
);
3727 tree type
= TREE_TYPE (r
->exp
);
3729 if (POINTER_TYPE_P (type
)
3730 || TREE_CODE (type
) == OFFSET_TYPE
3731 || (id
>= l
&& !useless_type_conversion_p (type1
, type
)))
3733 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3734 g
= gimple_build_assign (make_ssa_name (type3
), NOP_EXPR
, exp
);
3735 gimple_seq_add_stmt_without_update (&seq
, g
);
3736 exp
= gimple_assign_lhs (g
);
3739 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3741 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3742 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3744 gimple_seq_add_stmt_without_update (&seq
, g
);
3745 op
= gimple_assign_lhs (g
);
3747 type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3748 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3751 = gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3752 gimple_seq_add_stmt_without_update (&seq
, g
);
3753 op
= gimple_assign_lhs (g
);
3756 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3757 candidates
.length (), opcode
, ops
, op
,
3758 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3759 ranges
[k
- 1].high
, strict_overflow_p
))
3762 gimple_seq_discard (seq
);
3763 if ((b
% 4) == 2 && buckets
[b
] != i
)
3764 /* There is more work to do for this bucket. */
3771 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3772 a >= 0 && a < b into (unsigned) a < (unsigned) b
3773 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3776 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3777 vec
<operand_entry
*> *ops
,
3778 struct range_entry
*ranges
,
3779 basic_block first_bb
)
3782 bool any_changes
= false;
3783 hash_map
<tree
, int> *map
= NULL
;
3785 for (i
= first
; i
< length
; i
++)
3787 if (ranges
[i
].exp
== NULL_TREE
3788 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3792 tree type
= TREE_TYPE (ranges
[i
].exp
);
3793 if (!INTEGRAL_TYPE_P (type
)
3794 || TYPE_UNSIGNED (type
)
3795 || ranges
[i
].low
== NULL_TREE
3796 || !integer_zerop (ranges
[i
].low
)
3797 || ranges
[i
].high
!= NULL_TREE
)
3799 /* EXP >= 0 here. */
3801 map
= new hash_map
<tree
, int>;
3802 map
->put (ranges
[i
].exp
, i
);
3808 for (i
= 0; i
< length
; i
++)
3810 bool in_p
= ranges
[i
].in_p
;
3811 if (ranges
[i
].low
== NULL_TREE
3812 || ranges
[i
].high
== NULL_TREE
)
3814 if (!integer_zerop (ranges
[i
].low
)
3815 || !integer_zerop (ranges
[i
].high
))
3818 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3819 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3820 && integer_onep (ranges
[i
].low
)
3821 && integer_onep (ranges
[i
].high
))
3832 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3834 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3835 if (!is_gimple_assign (stmt
))
3837 ccode
= gimple_assign_rhs_code (stmt
);
3838 rhs1
= gimple_assign_rhs1 (stmt
);
3839 rhs2
= gimple_assign_rhs2 (stmt
);
3843 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3844 stmt
= last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3845 if (gimple_code (stmt
) != GIMPLE_COND
)
3847 ccode
= gimple_cond_code (stmt
);
3848 rhs1
= gimple_cond_lhs (stmt
);
3849 rhs2
= gimple_cond_rhs (stmt
);
3852 if (TREE_CODE (rhs1
) != SSA_NAME
3853 || rhs2
== NULL_TREE
3854 || TREE_CODE (rhs2
) != SSA_NAME
)
3868 ccode
= invert_tree_comparison (ccode
, false);
3873 std::swap (rhs1
, rhs2
);
3874 ccode
= swap_tree_comparison (ccode
);
3883 int *idx
= map
->get (rhs1
);
3887 /* maybe_optimize_range_tests allows statements without side-effects
3888 in the basic blocks as long as they are consumed in the same bb.
3889 Make sure rhs2's def stmt is not among them, otherwise we can't
3890 use safely get_nonzero_bits on it. E.g. in:
3891 # RANGE [-83, 1] NONZERO 173
3892 # k_32 = PHI <k_47(13), k_12(9)>
3895 goto <bb 5>; [26.46%]
3897 goto <bb 9>; [73.54%]
3899 <bb 5> [local count: 140323371]:
3900 # RANGE [0, 1] NONZERO 1
3902 # RANGE [0, 4] NONZERO 4
3904 # RANGE [0, 4] NONZERO 4
3905 iftmp.0_44 = (char) _21;
3906 if (k_32 < iftmp.0_44)
3907 goto <bb 6>; [84.48%]
3909 goto <bb 9>; [15.52%]
3910 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3911 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3912 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3913 those stmts even for negative k_32 and the value ranges would be no
3914 longer guaranteed and so the optimization would be invalid. */
3915 while (opcode
== ERROR_MARK
)
3917 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3918 basic_block bb2
= gimple_bb (g
);
3921 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3923 /* As an exception, handle a few common cases. */
3924 if (gimple_assign_cast_p (g
)
3925 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3927 tree op0
= gimple_assign_rhs1 (g
);
3928 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3929 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3930 > TYPE_PRECISION (TREE_TYPE (op0
))))
3931 /* Zero-extension is always ok. */
3933 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3934 == TYPE_PRECISION (TREE_TYPE (op0
))
3935 && TREE_CODE (op0
) == SSA_NAME
)
3937 /* Cast from signed to unsigned or vice versa. Retry
3938 with the op0 as new rhs2. */
3943 else if (is_gimple_assign (g
)
3944 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3945 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3946 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3947 /* Masking with INTEGER_CST with MSB clear is always ok
3954 if (rhs2
== NULL_TREE
)
3957 wide_int nz
= get_nonzero_bits (rhs2
);
3961 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3962 and RHS2 is known to be RHS2 >= 0. */
3963 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3965 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3966 if ((ranges
[*idx
].strict_overflow_p
3967 || ranges
[i
].strict_overflow_p
)
3968 && issue_strict_overflow_warning (wc
))
3969 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3970 "assuming signed overflow does not occur "
3971 "when simplifying range test");
3973 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3975 struct range_entry
*r
= &ranges
[*idx
];
3976 fprintf (dump_file
, "Optimizing range test ");
3977 print_generic_expr (dump_file
, r
->exp
);
3978 fprintf (dump_file
, " +[");
3979 print_generic_expr (dump_file
, r
->low
);
3980 fprintf (dump_file
, ", ");
3981 print_generic_expr (dump_file
, r
->high
);
3982 fprintf (dump_file
, "] and comparison ");
3983 print_generic_expr (dump_file
, rhs1
);
3984 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3985 print_generic_expr (dump_file
, rhs2
);
3986 fprintf (dump_file
, "\n into (");
3987 print_generic_expr (dump_file
, utype
);
3988 fprintf (dump_file
, ") ");
3989 print_generic_expr (dump_file
, rhs1
);
3990 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3991 print_generic_expr (dump_file
, utype
);
3992 fprintf (dump_file
, ") ");
3993 print_generic_expr (dump_file
, rhs2
);
3994 fprintf (dump_file
, "\n");
3997 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3999 if (opcode
== BIT_IOR_EXPR
4000 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4003 ccode
= invert_tree_comparison (ccode
, false);
4006 unsigned int uid
= gimple_uid (stmt
);
4007 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4008 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
4009 gimple_set_uid (g
, uid
);
4010 rhs1
= gimple_assign_lhs (g
);
4011 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4012 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
4014 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
4015 gimple_set_uid (g
, uid
);
4016 rhs2
= gimple_assign_lhs (g
);
4017 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4019 if (tree_swap_operands_p (rhs1
, rhs2
))
4021 std::swap (rhs1
, rhs2
);
4022 ccode
= swap_tree_comparison (ccode
);
4024 if (gimple_code (stmt
) == GIMPLE_COND
)
4026 gcond
*c
= as_a
<gcond
*> (stmt
);
4027 gimple_cond_set_code (c
, ccode
);
4028 gimple_cond_set_lhs (c
, rhs1
);
4029 gimple_cond_set_rhs (c
, rhs2
);
4034 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
4035 if (!INTEGRAL_TYPE_P (ctype
)
4036 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
4037 && TYPE_PRECISION (ctype
) != 1))
4038 ctype
= boolean_type_node
;
4039 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
4040 gimple_set_uid (g
, uid
);
4041 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4042 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
4044 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
4045 NOP_EXPR
, gimple_assign_lhs (g
));
4046 gimple_set_uid (g
, uid
);
4047 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4049 ranges
[i
].exp
= gimple_assign_lhs (g
);
4050 oe
->op
= ranges
[i
].exp
;
4051 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
4052 ranges
[i
].high
= ranges
[i
].low
;
4054 ranges
[i
].strict_overflow_p
= false;
4055 oe
= (*ops
)[ranges
[*idx
].idx
];
4056 /* Now change all the other range test immediate uses, so that
4057 those tests will be optimized away. */
4058 if (opcode
== ERROR_MARK
)
4061 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
4062 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
4064 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
4065 ? boolean_false_node
: boolean_true_node
);
4068 oe
->op
= error_mark_node
;
4069 ranges
[*idx
].exp
= NULL_TREE
;
4070 ranges
[*idx
].low
= NULL_TREE
;
4071 ranges
[*idx
].high
= NULL_TREE
;
4079 /* Optimize range tests, similarly how fold_range_test optimizes
4080 it on trees. The tree code for the binary
4081 operation between all the operands is OPCODE.
4082 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4083 maybe_optimize_range_tests for inter-bb range optimization.
4084 In that case if oe->op is NULL, oe->id is bb->index whose
4085 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4087 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4090 optimize_range_tests (enum tree_code opcode
,
4091 vec
<operand_entry
*> *ops
, basic_block first_bb
)
4093 unsigned int length
= ops
->length (), i
, j
, first
;
4095 struct range_entry
*ranges
;
4096 bool any_changes
= false;
4101 ranges
= XNEWVEC (struct range_entry
, length
);
4102 for (i
= 0; i
< length
; i
++)
4106 init_range_entry (ranges
+ i
, oe
->op
,
4109 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
4110 /* For | invert it now, we will invert it again before emitting
4111 the optimized expression. */
4112 if (opcode
== BIT_IOR_EXPR
4113 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4114 ranges
[i
].in_p
= !ranges
[i
].in_p
;
4117 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
4118 for (i
= 0; i
< length
; i
++)
4119 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
4122 /* Try to merge ranges. */
4123 for (first
= i
; i
< length
; i
++)
4125 tree low
= ranges
[i
].low
;
4126 tree high
= ranges
[i
].high
;
4127 int in_p
= ranges
[i
].in_p
;
4128 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
4129 int update_fail_count
= 0;
4131 for (j
= i
+ 1; j
< length
; j
++)
4133 if (ranges
[i
].exp
!= ranges
[j
].exp
)
4135 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
4136 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
4138 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
4144 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
4145 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
4146 low
, high
, strict_overflow_p
))
4151 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4152 while update_range_test would fail. */
4153 else if (update_fail_count
== 64)
4156 ++update_fail_count
;
4159 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
4162 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
4163 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
4165 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
4166 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
4168 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
4170 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
4173 if (any_changes
&& opcode
!= ERROR_MARK
)
4176 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4178 if (oe
->op
== error_mark_node
)
4187 XDELETEVEC (ranges
);
4191 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4192 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4193 otherwise the comparison code. TYPE is a return value that is set
4194 to type of comparison. */
4197 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
4198 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
4200 if (TREE_CODE (var
) != SSA_NAME
)
4203 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
4209 /* ??? If we start creating more COND_EXPR, we could perform
4210 this same optimization with them. For now, simplify. */
4211 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
4214 tree cond
= gimple_assign_rhs1 (stmt
);
4215 tree_code cmp
= TREE_CODE (cond
);
4216 if (cmp
!= SSA_NAME
)
4219 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4221 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4224 cmp
= gimple_assign_rhs_code (assign
);
4226 *lhs
= gimple_assign_rhs1 (assign
);
4228 *rhs
= gimple_assign_rhs2 (assign
);
4230 /* ??? For now, allow only canonical true and false result vectors.
4231 We could expand this to other constants should the need arise,
4232 but at the moment we don't create them. */
4233 tree t
= gimple_assign_rhs2 (stmt
);
4234 tree f
= gimple_assign_rhs3 (stmt
);
4236 if (integer_all_onesp (t
))
4238 else if (integer_all_onesp (f
))
4240 cmp
= invert_tree_comparison (cmp
, false);
4245 if (!integer_zerop (f
))
4254 *type
= TREE_TYPE (cond
);
4258 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4259 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4262 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4264 unsigned int length
= ops
->length (), i
, j
;
4265 bool any_changes
= false;
4270 for (i
= 0; i
< length
; ++i
)
4272 tree elt0
= (*ops
)[i
]->op
;
4274 gassign
*stmt0
, *vcond0
;
4276 tree type
, lhs0
, rhs0
;
4277 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4279 if (cmp0
== ERROR_MARK
)
4282 for (j
= i
+ 1; j
< length
; ++j
)
4284 tree
&elt1
= (*ops
)[j
]->op
;
4286 gassign
*stmt1
, *vcond1
;
4288 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4290 if (cmp1
== ERROR_MARK
)
4294 if (opcode
== BIT_AND_EXPR
)
4295 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4297 else if (opcode
== BIT_IOR_EXPR
)
4298 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4306 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4308 fprintf (dump_file
, "Transforming ");
4309 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4310 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4311 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4312 fprintf (dump_file
, " into ");
4313 print_generic_expr (dump_file
, comb
);
4314 fputc ('\n', dump_file
);
4317 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4318 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4319 true, GSI_SAME_STMT
);
4321 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4322 gimple_assign_rhs3_ptr (vcond0
));
4323 gimple_assign_set_rhs1 (vcond0
, exp
);
4324 update_stmt (vcond0
);
4326 elt1
= error_mark_node
;
4335 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4337 if (oe
->op
== error_mark_node
)
4349 /* Return true if STMT is a cast like:
4355 # _345 = PHI <_123(N), 1(...), 1(...)>
4356 where _234 has bool type, _123 has single use and
4357 bb N has a single successor M. This is commonly used in
4358 the last block of a range test.
4360 Also Return true if STMT is tcc_compare like:
4366 # _345 = PHI <_234(N), 1(...), 1(...)>
4368 where _234 has booltype, single use and
4369 bb N has a single successor M. This is commonly used in
4370 the last block of a range test. */
4373 final_range_test_p (gimple
*stmt
)
4375 basic_block bb
, rhs_bb
, lhs_bb
;
4378 use_operand_p use_p
;
4381 if (!gimple_assign_cast_p (stmt
)
4382 && (!is_gimple_assign (stmt
)
4383 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4384 != tcc_comparison
)))
4386 bb
= gimple_bb (stmt
);
4387 if (!single_succ_p (bb
))
4389 e
= single_succ_edge (bb
);
4390 if (e
->flags
& EDGE_COMPLEX
)
4393 lhs
= gimple_assign_lhs (stmt
);
4394 rhs
= gimple_assign_rhs1 (stmt
);
4395 if (gimple_assign_cast_p (stmt
)
4396 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4397 || TREE_CODE (rhs
) != SSA_NAME
4398 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4401 if (!gimple_assign_cast_p (stmt
)
4402 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4405 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4406 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4409 if (gimple_code (use_stmt
) != GIMPLE_PHI
4410 || gimple_bb (use_stmt
) != e
->dest
)
4413 /* And that the rhs is defined in the same loop. */
4414 if (gimple_assign_cast_p (stmt
))
4416 if (TREE_CODE (rhs
) != SSA_NAME
4417 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4418 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4423 if (TREE_CODE (lhs
) != SSA_NAME
4424 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4425 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4432 /* Return true if BB is suitable basic block for inter-bb range test
4433 optimization. If BACKWARD is true, BB should be the only predecessor
4434 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4435 or compared with to find a common basic block to which all conditions
4436 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4437 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4438 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4439 block points to an empty block that falls through into *OTHER_BB and
4440 the phi args match that path. */
4443 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4444 bool *test_swapped_p
, bool backward
)
4446 edge_iterator ei
, ei2
;
4450 bool other_edge_seen
= false;
4455 /* Check last stmt first. */
4456 stmt
= last_nondebug_stmt (bb
);
4458 || (gimple_code (stmt
) != GIMPLE_COND
4459 && (backward
|| !final_range_test_p (stmt
)))
4460 || gimple_visited_p (stmt
)
4461 || stmt_could_throw_p (cfun
, stmt
)
4464 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4467 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4468 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4469 to *OTHER_BB (if not set yet, try to find it out). */
4470 if (EDGE_COUNT (bb
->succs
) != 2)
4472 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4474 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4476 if (e
->dest
== test_bb
)
4485 if (*other_bb
== NULL
)
4487 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4488 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4490 else if (e
->dest
== e2
->dest
)
4491 *other_bb
= e
->dest
;
4492 if (*other_bb
== NULL
)
4495 if (e
->dest
== *other_bb
)
4496 other_edge_seen
= true;
4500 if (*other_bb
== NULL
|| !other_edge_seen
)
4503 else if (single_succ (bb
) != *other_bb
)
4506 /* Now check all PHIs of *OTHER_BB. */
4507 e
= find_edge (bb
, *other_bb
);
4508 e2
= find_edge (test_bb
, *other_bb
);
4510 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4512 gphi
*phi
= gsi
.phi ();
4513 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4514 corresponding to BB and TEST_BB predecessor must be the same. */
4515 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4516 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4518 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4519 one of the PHIs should have the lhs of the last stmt in
4520 that block as PHI arg and that PHI should have 0 or 1
4521 corresponding to it in all other range test basic blocks
4525 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4526 == gimple_assign_lhs (stmt
)
4527 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4528 || integer_onep (gimple_phi_arg_def (phi
,
4534 gimple
*test_last
= last_nondebug_stmt (test_bb
);
4535 if (gimple_code (test_last
) == GIMPLE_COND
)
4537 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4540 /* For last_bb, handle also:
4542 goto <bb 6>; [34.00%]
4544 goto <bb 7>; [66.00%]
4546 <bb 6> [local count: 79512730]:
4548 <bb 7> [local count: 1073741824]:
4549 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4550 where bb 7 is *OTHER_BB, but the PHI values from the
4551 earlier bbs match the path through the empty bb
4555 e3
= EDGE_SUCC (test_bb
,
4556 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4559 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4560 if (empty_block_p (e3
->dest
)
4561 && single_succ_p (e3
->dest
)
4562 && single_succ (e3
->dest
) == *other_bb
4563 && single_pred_p (e3
->dest
)
4564 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4567 e2
= single_succ_edge (e3
->dest
);
4569 e
= single_succ_edge (e3
->dest
);
4571 *test_swapped_p
= true;
4575 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4576 == gimple_assign_lhs (test_last
)
4577 && (integer_zerop (gimple_phi_arg_def (phi
,
4579 || integer_onep (gimple_phi_arg_def (phi
,
4590 /* Return true if BB doesn't have side-effects that would disallow
4591 range test optimization, all SSA_NAMEs set in the bb are consumed
4592 in the bb and there are no PHIs. */
4595 no_side_effect_bb (basic_block bb
)
4597 gimple_stmt_iterator gsi
;
4600 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4602 last
= last_nondebug_stmt (bb
);
4603 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4605 gimple
*stmt
= gsi_stmt (gsi
);
4607 imm_use_iterator imm_iter
;
4608 use_operand_p use_p
;
4610 if (is_gimple_debug (stmt
))
4612 if (gimple_has_side_effects (stmt
))
4616 if (!is_gimple_assign (stmt
))
4618 lhs
= gimple_assign_lhs (stmt
);
4619 if (TREE_CODE (lhs
) != SSA_NAME
)
4621 if (gimple_assign_rhs_could_trap_p (stmt
))
4623 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4625 gimple
*use_stmt
= USE_STMT (use_p
);
4626 if (is_gimple_debug (use_stmt
))
4628 if (gimple_bb (use_stmt
) != bb
)
4635 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4636 return true and fill in *OPS recursively. */
4639 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4642 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4646 if (!is_reassociable_op (stmt
, code
, loop
))
4649 rhs
[0] = gimple_assign_rhs1 (stmt
);
4650 rhs
[1] = gimple_assign_rhs2 (stmt
);
4651 gimple_set_visited (stmt
, true);
4652 for (i
= 0; i
< 2; i
++)
4653 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4654 && !get_ops (rhs
[i
], code
, ops
, loop
)
4655 && has_single_use (rhs
[i
]))
4657 operand_entry
*oe
= operand_entry_pool
.allocate ();
4663 oe
->stmt_to_insert
= NULL
;
4664 ops
->safe_push (oe
);
4669 /* Find the ops that were added by get_ops starting from VAR, see if
4670 they were changed during update_range_test and if yes, create new
4674 update_ops (tree var
, enum tree_code code
, const vec
<operand_entry
*> &ops
,
4675 unsigned int *pidx
, class loop
*loop
)
4677 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4681 if (!is_reassociable_op (stmt
, code
, loop
))
4684 rhs
[0] = gimple_assign_rhs1 (stmt
);
4685 rhs
[1] = gimple_assign_rhs2 (stmt
);
4688 for (i
= 0; i
< 2; i
++)
4689 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4691 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4692 if (rhs
[2 + i
] == NULL_TREE
)
4694 if (has_single_use (rhs
[i
]))
4695 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4697 rhs
[2 + i
] = rhs
[i
];
4700 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4701 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4703 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4704 var
= make_ssa_name (TREE_TYPE (var
));
4705 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4707 gimple_set_uid (g
, gimple_uid (stmt
));
4708 gimple_set_visited (g
, true);
4709 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4710 gimple_stmt_iterator gsi2
= gsi_for_stmt (g
);
4711 if (fold_stmt_inplace (&gsi2
))
4717 /* Structure to track the initial value passed to get_ops and
4718 the range in the ops vector for each basic block. */
4720 struct inter_bb_range_test_entry
4723 unsigned int first_idx
, last_idx
;
4726 /* Inter-bb range test optimization.
4728 Returns TRUE if a gimple conditional is optimized to a true/false,
4729 otherwise return FALSE.
4731 This indicates to the caller that it should run a CFG cleanup pass
4732 once reassociation is completed. */
4735 maybe_optimize_range_tests (gimple
*stmt
)
4737 basic_block first_bb
= gimple_bb (stmt
);
4738 basic_block last_bb
= first_bb
;
4739 basic_block other_bb
= NULL
;
4743 auto_vec
<operand_entry
*> ops
;
4744 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4745 bool any_changes
= false;
4746 bool cfg_cleanup_needed
= false;
4748 /* Consider only basic blocks that end with GIMPLE_COND or
4749 a cast statement satisfying final_range_test_p. All
4750 but the last bb in the first_bb .. last_bb range
4751 should end with GIMPLE_COND. */
4752 if (gimple_code (stmt
) == GIMPLE_COND
)
4754 if (EDGE_COUNT (first_bb
->succs
) != 2)
4755 return cfg_cleanup_needed
;
4757 else if (final_range_test_p (stmt
))
4758 other_bb
= single_succ (first_bb
);
4760 return cfg_cleanup_needed
;
4762 if (stmt_could_throw_p (cfun
, stmt
))
4763 return cfg_cleanup_needed
;
4765 /* As relative ordering of post-dominator sons isn't fixed,
4766 maybe_optimize_range_tests can be called first on any
4767 bb in the range we want to optimize. So, start searching
4768 backwards, if first_bb can be set to a predecessor. */
4769 while (single_pred_p (first_bb
))
4771 basic_block pred_bb
= single_pred (first_bb
);
4772 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4774 if (!no_side_effect_bb (first_bb
))
4778 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4779 Before starting forward search in last_bb successors, find
4780 out the other_bb. */
4781 if (first_bb
== last_bb
)
4784 /* As non-GIMPLE_COND last stmt always terminates the range,
4785 if forward search didn't discover anything, just give up. */
4786 if (gimple_code (stmt
) != GIMPLE_COND
)
4787 return cfg_cleanup_needed
;
4788 /* Look at both successors. Either it ends with a GIMPLE_COND
4789 and satisfies suitable_cond_bb, or ends with a cast and
4790 other_bb is that cast's successor. */
4791 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4792 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4793 || e
->dest
== first_bb
)
4794 return cfg_cleanup_needed
;
4795 else if (single_pred_p (e
->dest
))
4797 stmt
= last_nondebug_stmt (e
->dest
);
4799 && gimple_code (stmt
) == GIMPLE_COND
4800 && EDGE_COUNT (e
->dest
->succs
) == 2)
4802 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4809 && final_range_test_p (stmt
)
4810 && find_edge (first_bb
, single_succ (e
->dest
)))
4812 other_bb
= single_succ (e
->dest
);
4813 if (other_bb
== first_bb
)
4817 if (other_bb
== NULL
)
4818 return cfg_cleanup_needed
;
4820 /* Now do the forward search, moving last_bb to successor bbs
4821 that aren't other_bb. */
4822 while (EDGE_COUNT (last_bb
->succs
) == 2)
4824 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4825 if (e
->dest
!= other_bb
)
4829 if (!single_pred_p (e
->dest
))
4831 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4833 if (!no_side_effect_bb (e
->dest
))
4837 if (first_bb
== last_bb
)
4838 return cfg_cleanup_needed
;
4839 /* Here basic blocks first_bb through last_bb's predecessor
4840 end with GIMPLE_COND, all of them have one of the edges to
4841 other_bb and another to another block in the range,
4842 all blocks except first_bb don't have side-effects and
4843 last_bb ends with either GIMPLE_COND, or cast satisfying
4844 final_range_test_p. */
4845 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4847 enum tree_code code
;
4849 inter_bb_range_test_entry bb_ent
;
4851 bb_ent
.op
= NULL_TREE
;
4852 bb_ent
.first_idx
= ops
.length ();
4853 bb_ent
.last_idx
= bb_ent
.first_idx
;
4854 e
= find_edge (bb
, other_bb
);
4855 stmt
= last_nondebug_stmt (bb
);
4856 gimple_set_visited (stmt
, true);
4857 if (gimple_code (stmt
) != GIMPLE_COND
)
4859 use_operand_p use_p
;
4864 lhs
= gimple_assign_lhs (stmt
);
4865 rhs
= gimple_assign_rhs1 (stmt
);
4866 gcc_assert (bb
== last_bb
);
4875 # _345 = PHI <_123(N), 1(...), 1(...)>
4877 or 0 instead of 1. If it is 0, the _234
4878 range test is anded together with all the
4879 other range tests, if it is 1, it is ored with
4881 single_imm_use (lhs
, &use_p
, &phi
);
4882 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4883 e2
= find_edge (first_bb
, other_bb
);
4885 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4886 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4887 code
= BIT_AND_EXPR
;
4890 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4891 code
= BIT_IOR_EXPR
;
4894 /* If _234 SSA_NAME_DEF_STMT is
4896 (or &, corresponding to 1/0 in the phi arguments,
4897 push into ops the individual range test arguments
4898 of the bitwise or resp. and, recursively. */
4899 if (TREE_CODE (rhs
) == SSA_NAME
4900 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4902 && !get_ops (rhs
, code
, &ops
,
4903 loop_containing_stmt (stmt
))
4904 && has_single_use (rhs
))
4906 /* Otherwise, push the _234 range test itself. */
4907 operand_entry
*oe
= operand_entry_pool
.allocate ();
4913 oe
->stmt_to_insert
= NULL
;
4918 else if (is_gimple_assign (stmt
)
4919 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4921 && !get_ops (lhs
, code
, &ops
,
4922 loop_containing_stmt (stmt
))
4923 && has_single_use (lhs
))
4925 operand_entry
*oe
= operand_entry_pool
.allocate ();
4936 bb_ent
.last_idx
= ops
.length ();
4939 bbinfo
.safe_push (bb_ent
);
4940 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4941 ops
[i
]->id
= bb
->index
;
4944 else if (bb
== last_bb
)
4946 /* For last_bb, handle also:
4948 goto <bb 6>; [34.00%]
4950 goto <bb 7>; [66.00%]
4952 <bb 6> [local count: 79512730]:
4954 <bb 7> [local count: 1073741824]:
4955 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4956 where bb 7 is OTHER_BB, but the PHI values from the
4957 earlier bbs match the path through the empty bb
4959 bool test_swapped_p
= false;
4960 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4961 &other_bb
, &test_swapped_p
, true);
4964 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4966 /* Otherwise stmt is GIMPLE_COND. */
4967 code
= gimple_cond_code (stmt
);
4968 lhs
= gimple_cond_lhs (stmt
);
4969 rhs
= gimple_cond_rhs (stmt
);
4970 if (TREE_CODE (lhs
) == SSA_NAME
4971 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4972 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4973 || rhs
!= boolean_false_node
4974 /* Either push into ops the individual bitwise
4975 or resp. and operands, depending on which
4976 edge is other_bb. */
4977 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4978 ^ (code
== EQ_EXPR
))
4979 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4980 loop_containing_stmt (stmt
))))
4982 /* Or push the GIMPLE_COND stmt itself. */
4983 operand_entry
*oe
= operand_entry_pool
.allocate ();
4986 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4987 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4988 /* oe->op = NULL signs that there is no SSA_NAME
4989 for the range test, and oe->id instead is the
4990 basic block number, at which's end the GIMPLE_COND
4994 oe
->stmt_to_insert
= NULL
;
4999 else if (ops
.length () > bb_ent
.first_idx
)
5002 bb_ent
.last_idx
= ops
.length ();
5004 bbinfo
.safe_push (bb_ent
);
5005 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
5006 ops
[i
]->id
= bb
->index
;
5010 if (ops
.length () > 1)
5011 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
5014 unsigned int idx
, max_idx
= 0;
5015 /* update_ops relies on has_single_use predicates returning the
5016 same values as it did during get_ops earlier. Additionally it
5017 never removes statements, only adds new ones and it should walk
5018 from the single imm use and check the predicate already before
5019 making those changes.
5020 On the other side, the handling of GIMPLE_COND directly can turn
5021 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5022 it needs to be done in a separate loop afterwards. */
5023 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5025 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5026 && bbinfo
[idx
].op
!= NULL_TREE
)
5031 stmt
= last_nondebug_stmt (bb
);
5032 new_op
= update_ops (bbinfo
[idx
].op
,
5034 ops
[bbinfo
[idx
].first_idx
]->rank
,
5035 ops
, &bbinfo
[idx
].first_idx
,
5036 loop_containing_stmt (stmt
));
5037 if (new_op
== NULL_TREE
)
5039 gcc_assert (bb
== last_bb
);
5040 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
5042 if (bbinfo
[idx
].op
!= new_op
)
5044 imm_use_iterator iter
;
5045 use_operand_p use_p
;
5046 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
5048 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
5049 if (is_gimple_debug (use_stmt
))
5051 else if (gimple_code (use_stmt
) == GIMPLE_COND
5052 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5053 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5054 SET_USE (use_p
, new_op
);
5055 else if ((is_gimple_assign (use_stmt
)
5057 (gimple_assign_rhs_code (use_stmt
))
5058 == tcc_comparison
)))
5059 cast_or_tcc_cmp_stmt
= use_stmt
;
5060 else if (gimple_assign_cast_p (use_stmt
))
5061 cast_or_tcc_cmp_stmt
= use_stmt
;
5065 if (cast_or_tcc_cmp_stmt
)
5067 gcc_assert (bb
== last_bb
);
5068 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
5069 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
5070 enum tree_code rhs_code
5071 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
5072 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
5075 if (is_gimple_min_invariant (new_op
))
5077 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
5078 g
= gimple_build_assign (new_lhs
, new_op
);
5081 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
5082 gimple_stmt_iterator gsi
5083 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
5084 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
5085 gimple_set_visited (g
, true);
5086 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5087 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
5088 if (is_gimple_debug (use_stmt
))
5090 else if (gimple_code (use_stmt
) == GIMPLE_COND
5091 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5092 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5093 SET_USE (use_p
, new_lhs
);
5102 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5104 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5105 && bbinfo
[idx
].op
== NULL_TREE
5106 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
5108 gcond
*cond_stmt
= as_a
<gcond
*> (*gsi_last_bb (bb
));
5113 /* If we collapse the conditional to a true/false
5114 condition, then bubble that knowledge up to our caller. */
5115 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
5117 gimple_cond_make_false (cond_stmt
);
5118 cfg_cleanup_needed
= true;
5120 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
5122 gimple_cond_make_true (cond_stmt
);
5123 cfg_cleanup_needed
= true;
5127 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
5128 gimple_cond_set_lhs (cond_stmt
,
5129 ops
[bbinfo
[idx
].first_idx
]->op
);
5130 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
5132 update_stmt (cond_stmt
);
5138 /* The above changes could result in basic blocks after the first
5139 modified one, up to and including last_bb, to be executed even if
5140 they would not be in the original program. If the value ranges of
5141 assignment lhs' in those bbs were dependent on the conditions
5142 guarding those basic blocks which now can change, the VRs might
5143 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5144 are only used within the same bb, it should be not a big deal if
5145 we just reset all the VRs in those bbs. See PR68671. */
5146 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
5147 reset_flow_sensitive_info_in_bb (bb
);
5149 return cfg_cleanup_needed
;
5152 /* Remove def stmt of VAR if VAR has zero uses and recurse
5153 on rhs1 operand if so. */
5156 remove_visited_stmt_chain (tree var
)
5159 gimple_stmt_iterator gsi
;
5163 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
5165 stmt
= SSA_NAME_DEF_STMT (var
);
5166 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
5168 var
= gimple_assign_rhs1 (stmt
);
5169 gsi
= gsi_for_stmt (stmt
);
5170 reassoc_remove_stmt (&gsi
);
5171 release_defs (stmt
);
5178 /* This function checks three consequtive operands in
5179 passed operands vector OPS starting from OPINDEX and
5180 swaps two operands if it is profitable for binary operation
5181 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5183 We pair ops with the same rank if possible. */
5186 swap_ops_for_binary_stmt (const vec
<operand_entry
*> &ops
,
5187 unsigned int opindex
)
5189 operand_entry
*oe1
, *oe2
, *oe3
;
5192 oe2
= ops
[opindex
+ 1];
5193 oe3
= ops
[opindex
+ 2];
5195 if (oe1
->rank
== oe2
->rank
&& oe2
->rank
!= oe3
->rank
)
5196 std::swap (*oe1
, *oe3
);
5197 else if (oe1
->rank
== oe3
->rank
&& oe2
->rank
!= oe3
->rank
)
5198 std::swap (*oe1
, *oe2
);
5201 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5202 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5203 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5204 after the returned stmt. */
5206 static inline gimple
*
5207 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
, bool &insert_before
)
5209 insert_before
= true;
5210 if (TREE_CODE (rhs1
) == SSA_NAME
5211 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5213 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5214 insert_before
= false;
5216 if (TREE_CODE (rhs2
) == SSA_NAME
5217 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5219 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5220 insert_before
= false;
5225 /* If the stmt that defines operand has to be inserted, insert it
5228 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5230 gcc_assert (is_gimple_assign (stmt_to_insert
));
5231 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5232 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5234 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
, insert_before
);
5235 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5236 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5238 /* If the insert point is not stmt, then insert_point would be
5239 the point where operand rhs1 or rhs2 is defined. In this case,
5240 stmt_to_insert has to be inserted afterwards. This would
5241 only happen when the stmt insertion point is flexible. */
5243 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5245 insert_stmt_after (stmt_to_insert
, insert_point
);
5249 /* Recursively rewrite our linearized statements so that the operators
5250 match those in OPS[OPINDEX], putting the computation in rank
5251 order. Return new lhs.
5252 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5253 the current stmt and during recursive invocations.
5254 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5255 recursive invocations. */
5258 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5259 const vec
<operand_entry
*> &ops
, bool changed
,
5262 tree rhs1
= gimple_assign_rhs1 (stmt
);
5263 tree rhs2
= gimple_assign_rhs2 (stmt
);
5264 tree lhs
= gimple_assign_lhs (stmt
);
5267 /* The final recursion case for this function is that you have
5268 exactly two operations left.
5269 If we had exactly one op in the entire list to start with, we
5270 would have never called this function, and the tail recursion
5271 rewrites them one at a time. */
5272 if (opindex
+ 2 == ops
.length ())
5274 operand_entry
*oe1
, *oe2
;
5277 oe2
= ops
[opindex
+ 1];
5279 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5281 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5282 unsigned int uid
= gimple_uid (stmt
);
5284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5286 fprintf (dump_file
, "Transforming ");
5287 print_gimple_stmt (dump_file
, stmt
, 0);
5290 /* If the stmt that defines operand has to be inserted, insert it
5292 if (oe1
->stmt_to_insert
)
5293 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5294 if (oe2
->stmt_to_insert
)
5295 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5296 /* Even when changed is false, reassociation could have e.g. removed
5297 some redundant operations, so unless we are just swapping the
5298 arguments or unless there is no change at all (then we just
5299 return lhs), force creation of a new SSA_NAME. */
5300 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5303 gimple
*insert_point
5304 = find_insert_point (stmt
, oe1
->op
, oe2
->op
, insert_before
);
5305 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5307 = gimple_build_assign (lhs
, rhs_code
,
5309 gimple_set_uid (stmt
, uid
);
5310 gimple_set_visited (stmt
, true);
5312 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5314 insert_stmt_after (stmt
, insert_point
);
5319 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
,
5322 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5323 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5327 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5328 remove_visited_stmt_chain (rhs1
);
5330 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5332 fprintf (dump_file
, " into ");
5333 print_gimple_stmt (dump_file
, stmt
, 0);
5339 /* If we hit here, we should have 3 or more ops left. */
5340 gcc_assert (opindex
+ 2 < ops
.length ());
5342 /* Rewrite the next operator. */
5345 /* If the stmt that defines operand has to be inserted, insert it
5347 if (oe
->stmt_to_insert
)
5348 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5350 /* Recurse on the LHS of the binary operator, which is guaranteed to
5351 be the non-leaf side. */
5353 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5354 changed
|| oe
->op
!= rhs2
|| next_changed
,
5357 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5359 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5361 fprintf (dump_file
, "Transforming ");
5362 print_gimple_stmt (dump_file
, stmt
, 0);
5365 /* If changed is false, this is either opindex == 0
5366 or all outer rhs2's were equal to corresponding oe->op,
5367 and powi_result is NULL.
5368 That means lhs is equivalent before and after reassociation.
5369 Otherwise ensure the old lhs SSA_NAME is not reused and
5370 create a new stmt as well, so that any debug stmts will be
5371 properly adjusted. */
5374 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5375 unsigned int uid
= gimple_uid (stmt
);
5377 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
,
5380 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5381 stmt
= gimple_build_assign (lhs
, rhs_code
,
5383 gimple_set_uid (stmt
, uid
);
5384 gimple_set_visited (stmt
, true);
5386 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5388 insert_stmt_after (stmt
, insert_point
);
5393 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
,
5396 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5397 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5401 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5403 fprintf (dump_file
, " into ");
5404 print_gimple_stmt (dump_file
, stmt
, 0);
5410 /* Find out how many cycles we need to compute statements chain.
5411 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5412 maximum number of independent statements we may execute per cycle. */
5415 get_required_cycles (int ops_num
, int cpu_width
)
5421 /* While we have more than 2 * cpu_width operands
5422 we may reduce number of operands by cpu_width
5424 res
= ops_num
/ (2 * cpu_width
);
5426 /* Remained operands count may be reduced twice per cycle
5427 until we have only one operand. */
5428 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5429 elog
= exact_log2 (rest
);
5433 res
+= floor_log2 (rest
) + 1;
5438 /* Given that the target fully pipelines FMA instructions, return the latency
5439 of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5443 get_mult_latency_consider_fma (int ops_num
, int mult_num
, int width
)
5445 gcc_checking_assert (mult_num
&& mult_num
<= ops_num
);
5447 /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5453 _2 = .FMA (C, D, _1);
5455 Otherwise there's latency(MULT)*1 in the first FMA. */
5456 return CEIL (ops_num
, width
) == CEIL (mult_num
, width
) ? 2 : 1;
5459 /* Returns an optimal number of registers to use for computation of
5462 LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5463 that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5466 get_reassociation_width (vec
<operand_entry
*> *ops
, int mult_num
, tree lhs
,
5467 enum tree_code opc
, machine_mode mode
)
5469 int param_width
= param_tree_reassoc_width
;
5473 int ops_num
= ops
->length ();
5475 if (param_width
> 0)
5476 width
= param_width
;
5478 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5483 /* Get the minimal time required for sequence computation. */
5484 cycles_best
= get_required_cycles (ops_num
, width
);
5486 /* Check if we may use less width and still compute sequence for
5487 the same time. It will allow us to reduce registers usage.
5488 get_required_cycles is monotonically increasing with lower width
5489 so we can perform a binary search for the minimal width that still
5490 results in the optimal cycle count. */
5493 /* If the target fully pipelines FMA instruction, the multiply part can start
5494 already if its operands are ready. Assuming symmetric pipes are used for
5495 FMUL/FADD/FMA, then for a sequence of FMA like:
5497 _8 = .FMA (_2, _3, _1);
5498 _9 = .FMA (_5, _4, _8);
5499 _10 = .FMA (_7, _6, _9);
5501 , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5505 _9 = .FMA (_2, _3, _1);
5506 _10 = .FMA (_6, _7, _8);
5509 , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5510 latency(ADD), the first variant is preferred.
5512 Find out if we can get a smaller width considering FMA.
5513 Assume FMUL and FMA use the same units that can also do FADD.
5514 For other scenarios, such as when FMUL and FADD are using separated units,
5515 the following code may not apply. */
5517 int width_mult
= targetm
.sched
.reassociation_width (MULT_EXPR
, mode
);
5518 if (width
> 1 && mult_num
&& param_fully_pipelined_fma
5519 && width_mult
<= width
)
5521 /* Latency of MULT_EXPRs. */
5523 = get_mult_latency_consider_fma (ops_num
, mult_num
, width_mult
);
5525 /* Quick search might not apply. So start from 1. */
5526 for (int i
= 1; i
< width_mult
; i
++)
5529 = get_mult_latency_consider_fma (ops_num
, mult_num
, i
);
5530 int lat_add_new
= get_required_cycles (ops_num
, i
);
5532 /* Assume latency(MULT) >= latency(ADD). */
5533 if (lat_mul
- lat_mul_new
>= lat_add_new
- cycles_best
)
5542 while (width
> width_min
)
5544 int width_mid
= (width
+ width_min
) / 2;
5546 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5548 else if (width_min
< width_mid
)
5549 width_min
= width_mid
;
5555 /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5556 better than skipping these FMA candidates in widening_mul. */
5558 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs
))),
5559 param_avoid_fma_max_bits
))
5561 /* Look for cross backedge dependency:
5562 1. LHS is a phi argument in the same basic block it is defined.
5563 2. And the result of the phi node is used in OPS. */
5564 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
));
5566 use_operand_p use_p
;
5567 imm_use_iterator iter
;
5568 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
5569 if (gphi
*phi
= dyn_cast
<gphi
*> (USE_STMT (use_p
)))
5571 if (gimple_phi_arg_edge (phi
, phi_arg_index_from_use (use_p
))->src
5574 tree phi_result
= gimple_phi_result (phi
);
5577 FOR_EACH_VEC_ELT (*ops
, j
, oe
)
5579 if (TREE_CODE (oe
->op
) != SSA_NAME
)
5582 /* Result of phi is operand of PLUS_EXPR. */
5583 if (oe
->op
== phi_result
)
5586 /* Check is result of phi is operand of MULT_EXPR. */
5587 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
5588 if (is_gimple_assign (def_stmt
)
5589 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
)
5591 tree rhs
= gimple_assign_rhs1 (def_stmt
);
5592 if (TREE_CODE (rhs
) == SSA_NAME
)
5594 if (rhs
== phi_result
)
5596 def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5599 if (is_gimple_assign (def_stmt
)
5600 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
5602 if (gimple_assign_rhs1 (def_stmt
) == phi_result
5603 || gimple_assign_rhs2 (def_stmt
) == phi_result
)
5613 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5614 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5615 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5617 /* Rewrite statements with dependency chain with regard the chance to generate
5619 For the chain with FMA: Try to keep fma opportunity as much as possible.
5620 For the chain without FMA: Putting the computation in rank order and trying
5621 to allow operations to be executed in parallel.
5623 e + f + a * b + c * d;
5629 This reassociation approach preserves the chance of fma generation as much
5632 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5633 the whole chain will have dependencies across the loop iteration. Just keep
5634 loop-carried ops in a separate chain.
5636 x_1 = phi (x_0, x_2)
5637 y_1 = phi (y_0, y_2)
5639 a + b + c + d + e + x1 + y1
5649 rewrite_expr_tree_parallel (gassign
*stmt
, int width
, bool has_fma
,
5650 const vec
<operand_entry
*> &ops
)
5652 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5653 int op_num
= ops
.length ();
5654 int op_normal_num
= op_num
;
5655 gcc_assert (op_num
> 0);
5656 int stmt_num
= op_num
- 1;
5657 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5659 tree tmp_op
[2], op1
;
5661 gimple
*stmt1
= NULL
;
5662 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5663 int last_rhs1_stmt_index
= 0, last_rhs2_stmt_index
= 0;
5664 int width_active
= 0, width_count
= 0;
5665 bool has_biased
= false, ops_changed
= false;
5666 auto_vec
<operand_entry
*> ops_normal
;
5667 auto_vec
<operand_entry
*> ops_biased
;
5668 vec
<operand_entry
*> *ops1
;
5670 /* We start expression rewriting from the top statements.
5671 So, in this loop we create a full list of statements
5672 we will work with. */
5673 stmts
[stmt_num
- 1] = stmt
;
5674 for (i
= stmt_num
- 2; i
>= 0; i
--)
5675 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5677 /* Avoid adding loop-carried ops to long chains, first filter out the
5678 loop-carried. But we need to make sure that the length of the remainder
5679 is not less than 4, which is the smallest ops length we can break the
5681 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5683 if (TREE_CODE (oe
->op
) == SSA_NAME
5684 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (oe
->op
))
5685 && op_normal_num
> 4)
5687 ops_biased
.safe_push (oe
);
5692 ops_normal
.safe_push (oe
);
5695 /* Width should not be larger than ops length / 2, since we can not create
5696 more parallel dependency chains that exceeds such value. */
5697 int width_normal
= op_normal_num
/ 2;
5698 int width_biased
= (op_num
- op_normal_num
) / 2;
5699 width_normal
= width
<= width_normal
? width
: width_normal
;
5700 width_biased
= width
<= width_biased
? width
: width_biased
;
5703 width_count
= width_active
= width_normal
;
5705 /* Build parallel dependency chain according to width. */
5706 for (i
= 0; i
< stmt_num
; i
++)
5708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5710 fprintf (dump_file
, "Transforming ");
5711 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5714 /* When the work of normal ops is over, but the loop is not over,
5715 continue to do biased ops. */
5716 if (width_count
== 0 && ops1
== &ops_normal
)
5719 width_count
= width_active
= width_biased
;
5723 /* Swap the operands if no FMA in the chain. */
5724 if (ops1
->length () > 2 && !has_fma
)
5725 swap_ops_for_binary_stmt (*ops1
, ops1
->length () - 3);
5727 if (i
< width_active
5728 || (ops_changed
&& i
<= (last_rhs1_stmt_index
+ width_active
)))
5730 for (j
= 0; j
< 2; j
++)
5734 /* If the stmt that defines operand has to be inserted, insert it
5736 stmt1
= oe
->stmt_to_insert
;
5738 insert_stmt_before_use (stmts
[i
], stmt1
);
5741 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5745 gimple_set_visited (stmts
[i
], true);
5750 /* We keep original statement only for the last one. All others are
5752 if (!ops1
->length ())
5754 /* For biased length equal to 2. */
5755 if (width_count
== BIASED_END_STMT
&& !last_rhs2_stmt_index
)
5756 last_rhs2_stmt_index
= i
- 1;
5758 /* When width_count == 2 and there is no biased, just finish. */
5759 if (width_count
== NORMAL_END_STMT
&& !has_biased
)
5761 last_rhs1_stmt_index
= i
- 1;
5762 last_rhs2_stmt_index
= i
- 2;
5764 if (last_rhs1_stmt_index
&& (last_rhs2_stmt_index
|| !has_biased
))
5766 /* We keep original statement only for the last one. All
5767 others are recreated. */
5768 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5769 (stmts
[last_rhs1_stmt_index
]));
5770 gimple_assign_set_rhs2 (stmts
[i
], gimple_assign_lhs
5771 (stmts
[last_rhs2_stmt_index
]));
5772 update_stmt (stmts
[i
]);
5777 build_and_add_sum (TREE_TYPE (last_rhs1
),
5778 gimple_assign_lhs (stmts
[i
-width_count
]),
5780 (stmts
[i
-width_count
+1]),
5782 gimple_set_visited (stmts
[i
], true);
5785 /* It is the end of normal or biased ops.
5786 last_rhs1_stmt_index used to record the last stmt index
5787 for normal ops. last_rhs2_stmt_index used to record the
5788 last stmt index for biased ops. */
5789 if (width_count
== BIASED_END_STMT
)
5791 gcc_assert (has_biased
);
5792 if (ops_biased
.length ())
5793 last_rhs1_stmt_index
= i
;
5795 last_rhs2_stmt_index
= i
;
5802 /* Attach the rest ops to the parallel dependency chain. */
5805 stmt1
= oe
->stmt_to_insert
;
5807 insert_stmt_before_use (stmts
[i
], stmt1
);
5810 /* For only one biased ops. */
5811 if (width_count
== SPECIAL_BIASED_END_STMT
)
5813 /* We keep original statement only for the last one. All
5814 others are recreated. */
5815 gcc_assert (has_biased
);
5816 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5817 (stmts
[last_rhs1_stmt_index
]));
5818 gimple_assign_set_rhs2 (stmts
[i
], op1
);
5819 update_stmt (stmts
[i
]);
5823 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5825 (stmts
[i
-width_active
]),
5828 gimple_set_visited (stmts
[i
], true);
5833 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5835 fprintf (dump_file
, " into ");
5836 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5840 remove_visited_stmt_chain (last_rhs1
);
5843 /* Transform STMT, which is really (A +B) + (C + D) into the left
5844 linear form, ((A+B)+C)+D.
5845 Recurse on D if necessary. */
5848 linearize_expr (gimple
*stmt
)
5850 gimple_stmt_iterator gsi
;
5851 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5852 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5853 gimple
*oldbinrhs
= binrhs
;
5854 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5855 gimple
*newbinrhs
= NULL
;
5856 class loop
*loop
= loop_containing_stmt (stmt
);
5857 tree lhs
= gimple_assign_lhs (stmt
);
5859 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5860 && is_reassociable_op (binrhs
, rhscode
, loop
));
5862 gsi
= gsi_for_stmt (stmt
);
5864 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5865 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5866 gimple_assign_rhs_code (binrhs
),
5867 gimple_assign_lhs (binlhs
),
5868 gimple_assign_rhs2 (binrhs
));
5869 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5870 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5871 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5873 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5874 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5876 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5878 fprintf (dump_file
, "Linearized: ");
5879 print_gimple_stmt (dump_file
, stmt
, 0);
5882 reassociate_stats
.linearized
++;
5885 gsi
= gsi_for_stmt (oldbinrhs
);
5886 reassoc_remove_stmt (&gsi
);
5887 release_defs (oldbinrhs
);
5889 gimple_set_visited (stmt
, true);
5890 gimple_set_visited (binlhs
, true);
5891 gimple_set_visited (binrhs
, true);
5893 /* Tail recurse on the new rhs if it still needs reassociation. */
5894 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5895 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5896 want to change the algorithm while converting to tuples. */
5897 linearize_expr (stmt
);
5900 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5901 it. Otherwise, return NULL. */
5904 get_single_immediate_use (tree lhs
)
5906 use_operand_p immuse
;
5909 if (TREE_CODE (lhs
) == SSA_NAME
5910 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5911 && is_gimple_assign (immusestmt
))
5917 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5918 representing the negated value. Insertions of any necessary
5919 instructions go before GSI.
5920 This function is recursive in that, if you hand it "a_5" as the
5921 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5922 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5925 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5927 gimple
*negatedefstmt
= NULL
;
5928 tree resultofnegate
;
5929 gimple_stmt_iterator gsi
;
5932 /* If we are trying to negate a name, defined by an add, negate the
5933 add operands instead. */
5934 if (TREE_CODE (tonegate
) == SSA_NAME
)
5935 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5936 if (TREE_CODE (tonegate
) == SSA_NAME
5937 && is_gimple_assign (negatedefstmt
)
5938 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5939 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5940 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5942 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5943 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5944 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5947 gsi
= gsi_for_stmt (negatedefstmt
);
5948 rhs1
= negate_value (rhs1
, &gsi
);
5950 gsi
= gsi_for_stmt (negatedefstmt
);
5951 rhs2
= negate_value (rhs2
, &gsi
);
5953 gsi
= gsi_for_stmt (negatedefstmt
);
5954 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5955 gimple_set_visited (negatedefstmt
, true);
5956 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5957 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5958 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5962 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5963 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5964 NULL_TREE
, true, GSI_SAME_STMT
);
5966 uid
= gimple_uid (gsi_stmt (gsi
));
5967 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5969 gimple
*stmt
= gsi_stmt (gsi
);
5970 if (gimple_uid (stmt
) != 0)
5972 gimple_set_uid (stmt
, uid
);
5974 return resultofnegate
;
5977 /* Return true if we should break up the subtract in STMT into an add
5978 with negate. This is true when we the subtract operands are really
5979 adds, or the subtract itself is used in an add expression. In
5980 either case, breaking up the subtract into an add with negate
5981 exposes the adds to reassociation. */
5984 should_break_up_subtract (gimple
*stmt
)
5986 tree lhs
= gimple_assign_lhs (stmt
);
5987 tree binlhs
= gimple_assign_rhs1 (stmt
);
5988 tree binrhs
= gimple_assign_rhs2 (stmt
);
5990 class loop
*loop
= loop_containing_stmt (stmt
);
5992 if (TREE_CODE (binlhs
) == SSA_NAME
5993 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5996 if (TREE_CODE (binrhs
) == SSA_NAME
5997 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
6000 if (TREE_CODE (lhs
) == SSA_NAME
6001 && (immusestmt
= get_single_immediate_use (lhs
))
6002 && is_gimple_assign (immusestmt
)
6003 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
6004 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
6005 && gimple_assign_rhs1 (immusestmt
) == lhs
)
6006 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
6011 /* Transform STMT from A - B into A + -B. */
6014 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
6016 tree rhs1
= gimple_assign_rhs1 (stmt
);
6017 tree rhs2
= gimple_assign_rhs2 (stmt
);
6019 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6021 fprintf (dump_file
, "Breaking up subtract ");
6022 print_gimple_stmt (dump_file
, stmt
, 0);
6025 rhs2
= negate_value (rhs2
, gsip
);
6026 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
6030 /* Determine whether STMT is a builtin call that raises an SSA name
6031 to an integer power and has only one use. If so, and this is early
6032 reassociation and unsafe math optimizations are permitted, place
6033 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6034 If any of these conditions does not hold, return FALSE. */
6037 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
6040 REAL_VALUE_TYPE c
, cint
;
6042 switch (gimple_call_combined_fn (stmt
))
6045 if (flag_errno_math
)
6048 *base
= gimple_call_arg (stmt
, 0);
6049 arg1
= gimple_call_arg (stmt
, 1);
6051 if (TREE_CODE (arg1
) != REAL_CST
)
6054 c
= TREE_REAL_CST (arg1
);
6056 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
6059 *exponent
= real_to_integer (&c
);
6060 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
6061 if (!real_identical (&c
, &cint
))
6067 *base
= gimple_call_arg (stmt
, 0);
6068 arg1
= gimple_call_arg (stmt
, 1);
6070 if (!tree_fits_shwi_p (arg1
))
6073 *exponent
= tree_to_shwi (arg1
);
6080 /* Expanding negative exponents is generally unproductive, so we don't
6081 complicate matters with those. Exponents of zero and one should
6082 have been handled by expression folding. */
6083 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
6089 /* Try to derive and add operand entry for OP to *OPS. Return false if
6093 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
6094 enum tree_code code
,
6095 tree op
, gimple
* def_stmt
)
6097 tree base
= NULL_TREE
;
6098 HOST_WIDE_INT exponent
= 0;
6100 if (TREE_CODE (op
) != SSA_NAME
6101 || ! has_single_use (op
))
6104 if (code
== MULT_EXPR
6105 && reassoc_insert_powi_p
6106 && flag_unsafe_math_optimizations
6107 && is_gimple_call (def_stmt
)
6108 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
6110 add_repeat_to_ops_vec (ops
, base
, exponent
);
6111 gimple_set_visited (def_stmt
, true);
6114 else if (code
== MULT_EXPR
6115 && is_gimple_assign (def_stmt
)
6116 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
6117 && !HONOR_SNANS (TREE_TYPE (op
))
6118 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
6119 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
)))
6120 && (!FLOAT_TYPE_P (TREE_TYPE (op
))
6121 || !DECIMAL_FLOAT_MODE_P (element_mode (op
))))
6123 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6124 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
6125 add_to_ops_vec (ops
, rhs1
);
6126 add_to_ops_vec (ops
, cst
);
6127 gimple_set_visited (def_stmt
, true);
6134 /* Recursively linearize a binary expression that is the RHS of STMT.
6135 Place the operands of the expression tree in the vector named OPS. */
6138 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
6139 bool is_associative
, bool set_visited
)
6141 tree binlhs
= gimple_assign_rhs1 (stmt
);
6142 tree binrhs
= gimple_assign_rhs2 (stmt
);
6143 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
6144 bool binlhsisreassoc
= false;
6145 bool binrhsisreassoc
= false;
6146 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
6147 class loop
*loop
= loop_containing_stmt (stmt
);
6150 gimple_set_visited (stmt
, true);
6152 if (TREE_CODE (binlhs
) == SSA_NAME
)
6154 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
6155 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
6156 && !stmt_could_throw_p (cfun
, binlhsdef
));
6159 if (TREE_CODE (binrhs
) == SSA_NAME
)
6161 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
6162 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
6163 && !stmt_could_throw_p (cfun
, binrhsdef
));
6166 /* If the LHS is not reassociable, but the RHS is, we need to swap
6167 them. If neither is reassociable, there is nothing we can do, so
6168 just put them in the ops vector. If the LHS is reassociable,
6169 linearize it. If both are reassociable, then linearize the RHS
6172 if (!binlhsisreassoc
)
6174 /* If this is not a associative operation like division, give up. */
6175 if (!is_associative
)
6177 add_to_ops_vec (ops
, binrhs
);
6181 if (!binrhsisreassoc
)
6184 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6185 /* If we add ops for the rhs we expect to be able to recurse
6186 to it via the lhs during expression rewrite so swap
6190 add_to_ops_vec (ops
, binrhs
);
6192 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
6193 add_to_ops_vec (ops
, binlhs
);
6199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6201 fprintf (dump_file
, "swapping operands of ");
6202 print_gimple_stmt (dump_file
, stmt
, 0);
6205 swap_ssa_operands (stmt
,
6206 gimple_assign_rhs1_ptr (stmt
),
6207 gimple_assign_rhs2_ptr (stmt
));
6210 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6212 fprintf (dump_file
, " is now ");
6213 print_gimple_stmt (dump_file
, stmt
, 0);
6215 if (!binrhsisreassoc
)
6218 /* We want to make it so the lhs is always the reassociative op,
6220 std::swap (binlhs
, binrhs
);
6222 else if (binrhsisreassoc
)
6224 linearize_expr (stmt
);
6225 binlhs
= gimple_assign_rhs1 (stmt
);
6226 binrhs
= gimple_assign_rhs2 (stmt
);
6229 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
6230 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
6232 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
6233 is_associative
, set_visited
);
6235 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6236 add_to_ops_vec (ops
, binrhs
);
6239 /* Repropagate the negates back into subtracts, since no other pass
6240 currently does it. */
6243 repropagate_negates (void)
6248 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
6250 gimple
*user
= get_single_immediate_use (negate
);
6251 if (!user
|| !is_gimple_assign (user
))
6254 tree negateop
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
6255 if (TREE_CODE (negateop
) == SSA_NAME
6256 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop
))
6259 /* The negate operand can be either operand of a PLUS_EXPR
6260 (it can be the LHS if the RHS is a constant for example).
6262 Force the negate operand to the RHS of the PLUS_EXPR, then
6263 transform the PLUS_EXPR into a MINUS_EXPR. */
6264 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
6266 /* If the negated operand appears on the LHS of the
6267 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6268 to force the negated operand to the RHS of the PLUS_EXPR. */
6269 if (gimple_assign_rhs1 (user
) == negate
)
6271 swap_ssa_operands (user
,
6272 gimple_assign_rhs1_ptr (user
),
6273 gimple_assign_rhs2_ptr (user
));
6276 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6277 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6278 if (gimple_assign_rhs2 (user
) == negate
)
6280 tree rhs1
= gimple_assign_rhs1 (user
);
6281 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6282 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
,
6287 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
6289 if (gimple_assign_rhs1 (user
) == negate
)
6294 which we transform into
6297 This pushes down the negate which we possibly can merge
6298 into some other operation, hence insert it into the
6299 plus_negates vector. */
6300 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
6301 tree b
= gimple_assign_rhs2 (user
);
6302 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
6303 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
6304 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
6305 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, negateop
, b
);
6306 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
6307 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
6308 user
= gsi_stmt (gsi2
);
6310 reassoc_remove_stmt (&gsi
);
6311 release_defs (feed
);
6312 plus_negates
.safe_push (gimple_assign_lhs (user
));
6316 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6317 getting rid of one operation. */
6318 tree rhs1
= gimple_assign_rhs1 (user
);
6319 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6320 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, negateop
);
6321 update_stmt (gsi_stmt (gsi
));
6327 /* Break up subtract operations in block BB.
6329 We do this top down because we don't know whether the subtract is
6330 part of a possible chain of reassociation except at the top.
6339 we want to break up k = t - q, but we won't until we've transformed q
6340 = b - r, which won't be broken up until we transform b = c - d.
6342 En passant, clear the GIMPLE visited flag on every statement
6343 and set UIDs within each basic block. */
6346 break_up_subtract_bb (basic_block bb
)
6348 gimple_stmt_iterator gsi
;
6349 unsigned int uid
= 1;
6351 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6353 gimple
*stmt
= gsi_stmt (gsi
);
6354 gimple_set_visited (stmt
, false);
6355 gimple_set_uid (stmt
, uid
++);
6357 if (!is_gimple_assign (stmt
)
6358 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt
)))
6359 || !can_reassociate_op_p (gimple_assign_lhs (stmt
)))
6362 /* Look for simple gimple subtract operations. */
6363 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
6365 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt
))
6366 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt
)))
6369 /* Check for a subtract used only in an addition. If this
6370 is the case, transform it into add of a negate for better
6371 reassociation. IE transform C = A-B into C = A + -B if C
6372 is only used in an addition. */
6373 if (should_break_up_subtract (stmt
))
6374 break_up_subtract (stmt
, &gsi
);
6376 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
6377 && can_reassociate_op_p (gimple_assign_rhs1 (stmt
)))
6378 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
6382 /* Used for repeated factor analysis. */
6383 struct repeat_factor
6385 /* An SSA name that occurs in a multiply chain. */
6388 /* Cached rank of the factor. */
6391 /* Number of occurrences of the factor in the chain. */
6392 HOST_WIDE_INT count
;
6394 /* An SSA name representing the product of this factor and
6395 all factors appearing later in the repeated factor vector. */
6400 static vec
<repeat_factor
> repeat_factor_vec
;
6402 /* Used for sorting the repeat factor vector. Sort primarily by
6403 ascending occurrence count, secondarily by descending rank. */
6406 compare_repeat_factors (const void *x1
, const void *x2
)
6408 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
6409 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
6411 if (rf1
->count
< rf2
->count
)
6413 else if (rf1
->count
> rf2
->count
)
6416 if (rf1
->rank
< rf2
->rank
)
6418 else if (rf1
->rank
> rf2
->rank
)
6424 /* Look for repeated operands in OPS in the multiply tree rooted at
6425 STMT. Replace them with an optimal sequence of multiplies and powi
6426 builtin calls, and remove the used operands from OPS. Return an
6427 SSA name representing the value of the replacement sequence. */
6430 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6432 unsigned i
, j
, vec_len
;
6435 repeat_factor
*rf1
, *rf2
;
6436 repeat_factor rfnew
;
6437 tree result
= NULL_TREE
;
6438 tree target_ssa
, iter_result
;
6439 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6440 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6441 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6442 gimple
*mul_stmt
, *pow_stmt
;
6444 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6445 target, unless type is integral. */
6446 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6449 /* Allocate the repeated factor vector. */
6450 repeat_factor_vec
.create (10);
6452 /* Scan the OPS vector for all SSA names in the product and build
6453 up a vector of occurrence counts for each factor. */
6454 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6456 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6458 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6460 if (rf1
->factor
== oe
->op
)
6462 rf1
->count
+= oe
->count
;
6467 if (j
>= repeat_factor_vec
.length ())
6469 rfnew
.factor
= oe
->op
;
6470 rfnew
.rank
= oe
->rank
;
6471 rfnew
.count
= oe
->count
;
6472 rfnew
.repr
= NULL_TREE
;
6473 repeat_factor_vec
.safe_push (rfnew
);
6478 /* Sort the repeated factor vector by (a) increasing occurrence count,
6479 and (b) decreasing rank. */
6480 repeat_factor_vec
.qsort (compare_repeat_factors
);
6482 /* It is generally best to combine as many base factors as possible
6483 into a product before applying __builtin_powi to the result.
6484 However, the sort order chosen for the repeated factor vector
6485 allows us to cache partial results for the product of the base
6486 factors for subsequent use. When we already have a cached partial
6487 result from a previous iteration, it is best to make use of it
6488 before looking for another __builtin_pow opportunity.
6490 As an example, consider x * x * y * y * y * z * z * z * z.
6491 We want to first compose the product x * y * z, raise it to the
6492 second power, then multiply this by y * z, and finally multiply
6493 by z. This can be done in 5 multiplies provided we cache y * z
6494 for use in both expressions:
6502 If we instead ignored the cached y * z and first multiplied by
6503 the __builtin_pow opportunity z * z, we would get the inferior:
6512 vec_len
= repeat_factor_vec
.length ();
6514 /* Repeatedly look for opportunities to create a builtin_powi call. */
6517 HOST_WIDE_INT power
;
6519 /* First look for the largest cached product of factors from
6520 preceding iterations. If found, create a builtin_powi for
6521 it if the minimum occurrence count for its factors is at
6522 least 2, or just use this cached product as our next
6523 multiplicand if the minimum occurrence count is 1. */
6524 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6526 if (rf1
->repr
&& rf1
->count
> 0)
6536 iter_result
= rf1
->repr
;
6538 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6542 fputs ("Multiplying by cached product ", dump_file
);
6543 for (elt
= j
; elt
< vec_len
; elt
++)
6545 rf
= &repeat_factor_vec
[elt
];
6546 print_generic_expr (dump_file
, rf
->factor
);
6547 if (elt
< vec_len
- 1)
6548 fputs (" * ", dump_file
);
6550 fputs ("\n", dump_file
);
6555 if (INTEGRAL_TYPE_P (type
))
6557 gcc_assert (power
> 1);
6558 gimple_stmt_iterator gsip
= gsi
;
6560 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6562 gimple_stmt_iterator gsic
= gsi
;
6563 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6565 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6566 gimple_set_visited (gsi_stmt (gsic
), true);
6572 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6574 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6575 build_int_cst (integer_type_node
,
6577 gimple_call_set_lhs (pow_stmt
, iter_result
);
6578 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6579 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6580 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6583 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6587 fputs ("Building __builtin_pow call for cached product (",
6589 for (elt
= j
; elt
< vec_len
; elt
++)
6591 rf
= &repeat_factor_vec
[elt
];
6592 print_generic_expr (dump_file
, rf
->factor
);
6593 if (elt
< vec_len
- 1)
6594 fputs (" * ", dump_file
);
6596 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6603 /* Otherwise, find the first factor in the repeated factor
6604 vector whose occurrence count is at least 2. If no such
6605 factor exists, there are no builtin_powi opportunities
6607 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6609 if (rf1
->count
>= 2)
6618 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6622 fputs ("Building __builtin_pow call for (", dump_file
);
6623 for (elt
= j
; elt
< vec_len
; elt
++)
6625 rf
= &repeat_factor_vec
[elt
];
6626 print_generic_expr (dump_file
, rf
->factor
);
6627 if (elt
< vec_len
- 1)
6628 fputs (" * ", dump_file
);
6630 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6633 reassociate_stats
.pows_created
++;
6635 /* Visit each element of the vector in reverse order (so that
6636 high-occurrence elements are visited first, and within the
6637 same occurrence count, lower-ranked elements are visited
6638 first). Form a linear product of all elements in this order
6639 whose occurrencce count is at least that of element J.
6640 Record the SSA name representing the product of each element
6641 with all subsequent elements in the vector. */
6642 if (j
== vec_len
- 1)
6643 rf1
->repr
= rf1
->factor
;
6646 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6650 rf1
= &repeat_factor_vec
[ii
];
6651 rf2
= &repeat_factor_vec
[ii
+ 1];
6653 /* Init the last factor's representative to be itself. */
6655 rf2
->repr
= rf2
->factor
;
6660 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6661 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6663 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6664 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6665 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6666 rf1
->repr
= target_ssa
;
6668 /* Don't reprocess the multiply we just introduced. */
6669 gimple_set_visited (mul_stmt
, true);
6673 /* Form a call to __builtin_powi for the maximum product
6674 just formed, raised to the power obtained earlier. */
6675 rf1
= &repeat_factor_vec
[j
];
6676 if (INTEGRAL_TYPE_P (type
))
6678 gcc_assert (power
> 1);
6679 gimple_stmt_iterator gsip
= gsi
;
6681 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6683 gimple_stmt_iterator gsic
= gsi
;
6684 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6686 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6687 gimple_set_visited (gsi_stmt (gsic
), true);
6693 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6694 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6695 build_int_cst (integer_type_node
,
6697 gimple_call_set_lhs (pow_stmt
, iter_result
);
6698 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6699 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6700 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6704 /* If we previously formed at least one other builtin_powi call,
6705 form the product of this one and those others. */
6708 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6709 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6710 result
, iter_result
);
6711 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6712 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6713 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6714 gimple_set_visited (mul_stmt
, true);
6715 result
= new_result
;
6718 result
= iter_result
;
6720 /* Decrement the occurrence count of each element in the product
6721 by the count found above, and remove this many copies of each
6723 for (i
= j
; i
< vec_len
; i
++)
6728 rf1
= &repeat_factor_vec
[i
];
6729 rf1
->count
-= power
;
6731 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6733 if (oe
->op
== rf1
->factor
)
6737 ops
->ordered_remove (n
);
6753 /* At this point all elements in the repeated factor vector have a
6754 remaining occurrence count of 0 or 1, and those with a count of 1
6755 don't have cached representatives. Re-sort the ops vector and
6757 ops
->qsort (sort_by_operand_rank
);
6758 repeat_factor_vec
.release ();
6760 /* Return the final product computed herein. Note that there may
6761 still be some elements with single occurrence count left in OPS;
6762 those will be handled by the normal reassociation logic. */
6766 /* Attempt to optimize
6767 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6768 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6771 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6775 unsigned int length
= ops
->length ();
6776 tree cst
= ops
->last ()->op
;
6778 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6781 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6783 if (TREE_CODE (oe
->op
) == SSA_NAME
6784 && has_single_use (oe
->op
))
6786 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6787 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6790 switch (gimple_call_combined_fn (old_call
))
6793 CASE_CFN_COPYSIGN_FN
:
6794 arg0
= gimple_call_arg (old_call
, 0);
6795 arg1
= gimple_call_arg (old_call
, 1);
6796 /* The first argument of copysign must be a constant,
6797 otherwise there's nothing to do. */
6798 if (TREE_CODE (arg0
) == REAL_CST
)
6800 tree type
= TREE_TYPE (arg0
);
6801 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6802 /* If we couldn't fold to a single constant, skip it.
6803 That happens e.g. for inexact multiplication when
6805 if (mul
== NULL_TREE
)
6807 /* Instead of adjusting OLD_CALL, let's build a new
6808 call to not leak the LHS and prevent keeping bogus
6809 debug statements. DCE will clean up the old call. */
6811 if (gimple_call_internal_p (old_call
))
6812 new_call
= gimple_build_call_internal
6813 (IFN_COPYSIGN
, 2, mul
, arg1
);
6815 new_call
= gimple_build_call
6816 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6817 tree lhs
= make_ssa_name (type
);
6818 gimple_call_set_lhs (new_call
, lhs
);
6819 gimple_set_location (new_call
,
6820 gimple_location (old_call
));
6821 insert_stmt_after (new_call
, old_call
);
6822 /* We've used the constant, get rid of it. */
6824 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6825 /* Handle the CST1 < 0 case by negating the result. */
6828 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6830 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6831 insert_stmt_after (negate_stmt
, new_call
);
6836 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6838 fprintf (dump_file
, "Optimizing copysign: ");
6839 print_generic_expr (dump_file
, cst
);
6840 fprintf (dump_file
, " * COPYSIGN (");
6841 print_generic_expr (dump_file
, arg0
);
6842 fprintf (dump_file
, ", ");
6843 print_generic_expr (dump_file
, arg1
);
6844 fprintf (dump_file
, ") into %sCOPYSIGN (",
6845 cst1_neg
? "-" : "");
6846 print_generic_expr (dump_file
, mul
);
6847 fprintf (dump_file
, ", ");
6848 print_generic_expr (dump_file
, arg1
);
6849 fprintf (dump_file
, "\n");
6862 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6865 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6869 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6871 fprintf (dump_file
, "Transforming ");
6872 print_gimple_stmt (dump_file
, stmt
, 0);
6875 rhs1
= gimple_assign_rhs1 (stmt
);
6876 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6878 remove_visited_stmt_chain (rhs1
);
6880 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6882 fprintf (dump_file
, " into ");
6883 print_gimple_stmt (dump_file
, stmt
, 0);
6887 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6890 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6891 tree rhs1
, tree rhs2
)
6893 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6895 fprintf (dump_file
, "Transforming ");
6896 print_gimple_stmt (dump_file
, stmt
, 0);
6899 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6900 update_stmt (gsi_stmt (*gsi
));
6901 remove_visited_stmt_chain (rhs1
);
6903 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6905 fprintf (dump_file
, " into ");
6906 print_gimple_stmt (dump_file
, stmt
, 0);
6910 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6911 Put no-mult ops and mult ops alternately at the end of the queue, which is
6912 conducive to generating more FMA and reducing the loss of FMA when breaking
6915 a * b + c * d + e generates:
6917 _4 = c_9(D) * d_10(D);
6918 _12 = .FMA (a_7(D), b_8(D), _4);
6921 Rearrange ops to -> e + a * b + c * d generates:
6923 _4 = .FMA (c_7(D), d_8(D), _3);
6924 _11 = .FMA (a_5(D), b_6(D), _4);
6926 Return the number of MULT_EXPRs in the chain. */
6928 rank_ops_for_fma (vec
<operand_entry
*> *ops
)
6932 unsigned int ops_length
= ops
->length ();
6933 auto_vec
<operand_entry
*> ops_mult
;
6934 auto_vec
<operand_entry
*> ops_others
;
6936 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6938 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6940 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6941 if (is_gimple_assign (def_stmt
))
6943 if (gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
6944 ops_mult
.safe_push (oe
);
6945 /* A negate on the multiplication leads to FNMA. */
6946 else if (gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
6947 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
)
6949 gimple
*neg_def_stmt
6950 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt
));
6951 if (is_gimple_assign (neg_def_stmt
)
6952 && gimple_bb (neg_def_stmt
) == gimple_bb (def_stmt
)
6953 && gimple_assign_rhs_code (neg_def_stmt
) == MULT_EXPR
)
6954 ops_mult
.safe_push (oe
);
6956 ops_others
.safe_push (oe
);
6959 ops_others
.safe_push (oe
);
6962 ops_others
.safe_push (oe
);
6965 ops_others
.safe_push (oe
);
6967 /* 1. When ops_mult.length == 2, like the following case,
6971 we need to rearrange the ops.
6973 Putting ops that not def from mult in front can generate more FMAs.
6975 2. If all ops are defined with mult, we don't need to rearrange them. */
6976 unsigned mult_num
= ops_mult
.length ();
6977 if (mult_num
>= 2 && mult_num
!= ops_length
)
6979 /* Put no-mult ops and mult ops alternately at the end of the
6980 queue, which is conducive to generating more FMA and reducing the
6981 loss of FMA when breaking the chain. */
6983 ops
->splice (ops_mult
);
6984 int j
, opindex
= ops
->length ();
6985 int others_length
= ops_others
.length ();
6986 for (j
= 0; j
< others_length
; j
++)
6988 oe
= ops_others
.pop ();
6989 ops
->quick_insert (opindex
, oe
);
6996 /* Reassociate expressions in basic block BB and its post-dominator as
6999 Bubble up return status from maybe_optimize_range_tests. */
7002 reassociate_bb (basic_block bb
)
7004 gimple_stmt_iterator gsi
;
7005 gimple
*stmt
= last_nondebug_stmt (bb
);
7006 bool cfg_cleanup_needed
= false;
7008 if (stmt
&& !gimple_visited_p (stmt
))
7009 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
7011 bool do_prev
= false;
7012 for (gsi
= gsi_last_bb (bb
);
7013 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
7016 stmt
= gsi_stmt (gsi
);
7018 if (is_gimple_assign (stmt
)
7019 && !stmt_could_throw_p (cfun
, stmt
))
7021 tree lhs
, rhs1
, rhs2
;
7022 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
7024 /* If this was part of an already processed statement,
7025 we don't need to touch it again. */
7026 if (gimple_visited_p (stmt
))
7028 /* This statement might have become dead because of previous
7030 if (has_zero_uses (gimple_get_lhs (stmt
)))
7032 reassoc_remove_stmt (&gsi
);
7033 release_defs (stmt
);
7034 /* We might end up removing the last stmt above which
7035 places the iterator to the end of the sequence.
7036 Reset it to the last stmt in this case and make sure
7037 we don't do gsi_prev in that case. */
7038 if (gsi_end_p (gsi
))
7040 gsi
= gsi_last_bb (bb
);
7047 /* If this is not a gimple binary expression, there is
7048 nothing for us to do with it. */
7049 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
7052 lhs
= gimple_assign_lhs (stmt
);
7053 rhs1
= gimple_assign_rhs1 (stmt
);
7054 rhs2
= gimple_assign_rhs2 (stmt
);
7056 /* For non-bit or min/max operations we can't associate
7057 all types. Verify that here. */
7058 if ((rhs_code
!= BIT_IOR_EXPR
7059 && rhs_code
!= BIT_AND_EXPR
7060 && rhs_code
!= BIT_XOR_EXPR
7061 && rhs_code
!= MIN_EXPR
7062 && rhs_code
!= MAX_EXPR
7063 && !can_reassociate_type_p (TREE_TYPE (lhs
)))
7064 || !can_reassociate_op_p (rhs1
)
7065 || !can_reassociate_op_p (rhs2
))
7068 if (associative_tree_code (rhs_code
))
7070 auto_vec
<operand_entry
*> ops
;
7071 tree powi_result
= NULL_TREE
;
7072 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
7074 /* There may be no immediate uses left by the time we
7075 get here because we may have eliminated them all. */
7076 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
7079 gimple_set_visited (stmt
, true);
7080 linearize_expr_tree (&ops
, stmt
, true, true);
7081 ops
.qsort (sort_by_operand_rank
);
7082 int orig_len
= ops
.length ();
7083 optimize_ops_list (rhs_code
, &ops
);
7084 if (undistribute_ops_list (rhs_code
, &ops
,
7085 loop_containing_stmt (stmt
)))
7087 ops
.qsort (sort_by_operand_rank
);
7088 optimize_ops_list (rhs_code
, &ops
);
7090 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
7091 loop_containing_stmt (stmt
)))
7093 ops
.qsort (sort_by_operand_rank
);
7094 optimize_ops_list (rhs_code
, &ops
);
7096 if (rhs_code
== PLUS_EXPR
7097 && transform_add_to_multiply (&ops
))
7098 ops
.qsort (sort_by_operand_rank
);
7100 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
7103 optimize_vec_cond_expr (rhs_code
, &ops
);
7105 optimize_range_tests (rhs_code
, &ops
, NULL
);
7108 if (rhs_code
== MULT_EXPR
&& !is_vector
)
7110 attempt_builtin_copysign (&ops
);
7112 if (reassoc_insert_powi_p
7113 && (flag_unsafe_math_optimizations
7114 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
7115 powi_result
= attempt_builtin_powi (stmt
, &ops
);
7118 operand_entry
*last
;
7119 bool negate_result
= false;
7120 if (ops
.length () > 1
7121 && rhs_code
== MULT_EXPR
)
7124 if ((integer_minus_onep (last
->op
)
7125 || real_minus_onep (last
->op
))
7126 && !HONOR_SNANS (TREE_TYPE (lhs
))
7127 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
7128 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
7131 negate_result
= true;
7136 /* If the operand vector is now empty, all operands were
7137 consumed by the __builtin_powi optimization. */
7138 if (ops
.length () == 0)
7139 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
7140 else if (ops
.length () == 1)
7142 tree last_op
= ops
.last ()->op
;
7144 /* If the stmt that defines operand has to be inserted, insert it
7146 if (ops
.last ()->stmt_to_insert
)
7147 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
7149 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
7152 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
7156 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
7157 int ops_num
= ops
.length ();
7161 /* For binary bit operations, if there are at least 3
7162 operands and the last operand in OPS is a constant,
7163 move it to the front. This helps ensure that we generate
7164 (X & Y) & C rather than (X & C) & Y. The former will
7165 often match a canonical bit test when we get to RTL. */
7166 if (ops
.length () > 2
7167 && (rhs_code
== BIT_AND_EXPR
7168 || rhs_code
== BIT_IOR_EXPR
7169 || rhs_code
== BIT_XOR_EXPR
)
7170 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
7171 std::swap (*ops
[0], *ops
[ops_num
- 1]);
7173 optimization_type opt_type
= bb_optimization_type (bb
);
7175 /* If the target support FMA, rank_ops_for_fma will detect if
7176 the chain has fmas and rearrange the ops if so. */
7177 if (direct_internal_fn_supported_p (IFN_FMA
,
7180 && (rhs_code
== PLUS_EXPR
|| rhs_code
== MINUS_EXPR
))
7182 mult_num
= rank_ops_for_fma (&ops
);
7185 /* Only rewrite the expression tree to parallel in the
7186 last reassoc pass to avoid useless work back-and-forth
7187 with initial linearization. */
7188 bool has_fma
= mult_num
>= 2 && mult_num
!= ops_num
;
7189 if (!reassoc_insert_powi_p
7190 && ops
.length () > 3
7191 && (width
= get_reassociation_width (&ops
, mult_num
, lhs
,
7195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7197 "Width = %d was chosen for reassociation\n",
7199 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
7206 /* When there are three operands left, we want
7207 to make sure the ones that get the double
7208 binary op are chosen wisely. */
7209 int len
= ops
.length ();
7212 /* width > 1 means ranking ops results in better
7213 parallelism. Check current value to avoid
7214 calling get_reassociation_width again. */
7216 && get_reassociation_width (
7217 &ops
, mult_num
, lhs
, rhs_code
, mode
)
7219 swap_ops_for_binary_stmt (ops
, len
- 3);
7221 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
7227 /* If we combined some repeated factors into a
7228 __builtin_powi call, multiply that result by the
7229 reassociated operands. */
7232 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
7233 tree type
= TREE_TYPE (lhs
);
7234 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
7236 gimple_set_lhs (lhs_stmt
, target_ssa
);
7237 update_stmt (lhs_stmt
);
7240 target_ssa
= new_lhs
;
7243 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
7244 powi_result
, target_ssa
);
7245 gimple_set_location (mul_stmt
, gimple_location (stmt
));
7246 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
7247 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
7253 stmt
= SSA_NAME_DEF_STMT (lhs
);
7254 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
7255 gimple_set_lhs (stmt
, tmp
);
7258 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
7260 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
7261 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
7268 return cfg_cleanup_needed
;
7271 /* Add jumps around shifts for range tests turned into bit tests.
7272 For each SSA_NAME VAR we have code like:
7273 VAR = ...; // final stmt of range comparison
7274 // bit test here...;
7275 OTHERVAR = ...; // final stmt of the bit test sequence
7276 RES = VAR | OTHERVAR;
7277 Turn the above into:
7284 // bit test here...;
7287 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7295 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
7297 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
7300 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
7302 && is_gimple_assign (use_stmt
)
7303 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
7304 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
7306 basic_block cond_bb
= gimple_bb (def_stmt
);
7307 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
7308 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
7310 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7311 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
7312 build_zero_cst (TREE_TYPE (var
)),
7313 NULL_TREE
, NULL_TREE
);
7314 location_t loc
= gimple_location (use_stmt
);
7315 gimple_set_location (g
, loc
);
7316 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
7318 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
7319 etrue
->probability
= profile_probability::even ();
7320 edge efalse
= find_edge (cond_bb
, then_bb
);
7321 efalse
->flags
= EDGE_FALSE_VALUE
;
7322 efalse
->probability
-= etrue
->probability
;
7323 then_bb
->count
-= etrue
->count ();
7325 tree othervar
= NULL_TREE
;
7326 if (gimple_assign_rhs1 (use_stmt
) == var
)
7327 othervar
= gimple_assign_rhs2 (use_stmt
);
7328 else if (gimple_assign_rhs2 (use_stmt
) == var
)
7329 othervar
= gimple_assign_rhs1 (use_stmt
);
7332 tree lhs
= gimple_assign_lhs (use_stmt
);
7333 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
7334 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
7335 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
7336 gsi
= gsi_for_stmt (use_stmt
);
7337 gsi_remove (&gsi
, true);
7339 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
7340 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
7342 reassoc_branch_fixups
.release ();
7345 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
7346 void debug_ops_vector (vec
<operand_entry
*> ops
);
7348 /* Dump the operand entry vector OPS to FILE. */
7351 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
7356 FOR_EACH_VEC_ELT (ops
, i
, oe
)
7358 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
7359 print_generic_expr (file
, oe
->op
);
7360 fprintf (file
, "\n");
7364 /* Dump the operand entry vector OPS to STDERR. */
7367 debug_ops_vector (vec
<operand_entry
*> ops
)
7369 dump_ops_vector (stderr
, ops
);
7372 /* Bubble up return status from reassociate_bb. */
7377 bool cfg_cleanup_needed
= false;
7378 basic_block
*worklist
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (cfun
));
7381 for (auto son
= first_dom_son (CDI_DOMINATORS
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7382 son
; son
= next_dom_son (CDI_DOMINATORS
, son
))
7383 worklist
[sp
++] = son
;
7386 basic_block bb
= worklist
[--sp
];
7387 break_up_subtract_bb (bb
);
7388 for (auto son
= first_dom_son (CDI_DOMINATORS
, bb
);
7389 son
; son
= next_dom_son (CDI_DOMINATORS
, son
))
7390 worklist
[sp
++] = son
;
7393 for (auto son
= first_dom_son (CDI_POST_DOMINATORS
,
7394 EXIT_BLOCK_PTR_FOR_FN (cfun
));
7395 son
; son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
7396 worklist
[sp
++] = son
;
7399 basic_block bb
= worklist
[--sp
];
7400 cfg_cleanup_needed
|= reassociate_bb (bb
);
7401 for (auto son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
7402 son
; son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
7403 worklist
[sp
++] = son
;
7407 return cfg_cleanup_needed
;
7410 /* Initialize the reassociation pass. */
7417 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
7419 /* Find the loops, so that we can prevent moving calculations in
7421 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
7423 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
7425 next_operand_entry_id
= 0;
7427 /* Reverse RPO (Reverse Post Order) will give us something where
7428 deeper loops come later. */
7429 pre_and_rev_post_order_compute (NULL
, bbs
, false);
7430 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
7431 operand_rank
= new hash_map
<tree
, int64_t>;
7433 /* Give each default definition a distinct rank. This includes
7434 parameters and the static chain. Walk backwards over all
7435 SSA names so that we get proper rank ordering according
7436 to tree_swap_operands_p. */
7437 for (i
= num_ssa_names
- 1; i
> 0; --i
)
7439 tree name
= ssa_name (i
);
7440 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
7441 insert_operand_rank (name
, ++rank
);
7444 /* Set up rank for each BB */
7445 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
7446 bb_rank
[bbs
[i
]] = ++rank
<< 16;
7449 calculate_dominance_info (CDI_POST_DOMINATORS
);
7450 plus_negates
= vNULL
;
7451 mark_ssa_maybe_undefs ();
7454 /* Cleanup after the reassociation pass, and print stats if
7460 statistics_counter_event (cfun
, "Linearized",
7461 reassociate_stats
.linearized
);
7462 statistics_counter_event (cfun
, "Constants eliminated",
7463 reassociate_stats
.constants_eliminated
);
7464 statistics_counter_event (cfun
, "Ops eliminated",
7465 reassociate_stats
.ops_eliminated
);
7466 statistics_counter_event (cfun
, "Statements rewritten",
7467 reassociate_stats
.rewritten
);
7468 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
7469 reassociate_stats
.pows_encountered
);
7470 statistics_counter_event (cfun
, "Built-in powi calls created",
7471 reassociate_stats
.pows_created
);
7473 delete operand_rank
;
7474 bitmap_clear (biased_names
);
7475 operand_entry_pool
.release ();
7477 plus_negates
.release ();
7478 free_dominance_info (CDI_POST_DOMINATORS
);
7479 loop_optimizer_finalize ();
7482 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7483 insertion of __builtin_powi calls.
7485 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7486 optimization of a gimple conditional. Otherwise returns zero. */
7489 execute_reassoc (bool insert_powi_p
, bool bias_loop_carried_phi_ranks_p
)
7491 reassoc_insert_powi_p
= insert_powi_p
;
7492 reassoc_bias_loop_carried_phi_ranks_p
= bias_loop_carried_phi_ranks_p
;
7496 bool cfg_cleanup_needed
;
7497 cfg_cleanup_needed
= do_reassoc ();
7498 repropagate_negates ();
7502 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
7507 const pass_data pass_data_reassoc
=
7509 GIMPLE_PASS
, /* type */
7510 "reassoc", /* name */
7511 OPTGROUP_NONE
, /* optinfo_flags */
7512 TV_TREE_REASSOC
, /* tv_id */
7513 ( PROP_cfg
| PROP_ssa
), /* properties_required */
7514 0, /* properties_provided */
7515 0, /* properties_destroyed */
7516 0, /* todo_flags_start */
7517 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
7520 class pass_reassoc
: public gimple_opt_pass
7523 pass_reassoc (gcc::context
*ctxt
)
7524 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
7527 /* opt_pass methods: */
7528 opt_pass
* clone () final override
{ return new pass_reassoc (m_ctxt
); }
7529 void set_pass_param (unsigned int n
, bool param
) final override
7531 gcc_assert (n
== 0);
7532 insert_powi_p
= param
;
7533 bias_loop_carried_phi_ranks_p
= !param
;
7535 bool gate (function
*) final override
{ return flag_tree_reassoc
!= 0; }
7536 unsigned int execute (function
*) final override
7538 return execute_reassoc (insert_powi_p
, bias_loop_carried_phi_ranks_p
);
7542 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7543 point 3a in the pass header comment. */
7545 bool bias_loop_carried_phi_ranks_p
;
7546 }; // class pass_reassoc
7551 make_pass_reassoc (gcc::context
*ctxt
)
7553 return new pass_reassoc (ctxt
);