1 /* Reassociation for trees.
2 Copyright (C) 2005-2025 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
44 #include "gimplify-me.h"
46 #include "tree-ssa-loop.h"
49 #include "langhooks.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
110 You want to first merge all leaves of the same rank, as much as
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
164 newstmt = mergetmp + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p
;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p
;
192 int constants_eliminated
;
195 int pows_encountered
;
200 static object_allocator
<operand_entry
> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id
;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
210 static int64_t *bb_rank
;
212 /* Operand->rank hashtable. */
213 static hash_map
<tree
, int64_t> *operand_rank
;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
217 static auto_bitmap biased_names
;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec
<tree
> reassoc_branch_fixups
;
226 static int64_t get_rank (tree
);
227 static bool reassoc_stmt_dominates_stmt_p (gimple
*, gimple
*);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
233 reassoc_remove_stmt (gimple_stmt_iterator
*gsi
)
235 gimple
*stmt
= gsi_stmt (*gsi
);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS
|| gimple_code (stmt
) == GIMPLE_PHI
)
238 return gsi_remove (gsi
, true);
240 gimple_stmt_iterator prev
= *gsi
;
242 unsigned uid
= gimple_uid (stmt
);
243 basic_block bb
= gimple_bb (stmt
);
244 bool ret
= gsi_remove (gsi
, true);
245 if (!gsi_end_p (prev
))
248 prev
= gsi_start_bb (bb
);
249 gimple
*end_stmt
= gsi_stmt (*gsi
);
250 while ((stmt
= gsi_stmt (prev
)) != end_stmt
)
252 gcc_assert (stmt
&& is_gimple_debug (stmt
) && gimple_uid (stmt
) == 0);
253 gimple_set_uid (stmt
, uid
);
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
279 propagate_bias_p (gimple
*stmt
)
282 imm_use_iterator use_iter
;
283 gimple
*single_use_stmt
= NULL
;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_reference
)
288 FOR_EACH_IMM_USE_FAST (use
, use_iter
, gimple_assign_lhs (stmt
))
290 gimple
*current_use_stmt
= USE_STMT (use
);
292 if (is_gimple_assign (current_use_stmt
)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt
)) == SSA_NAME
)
295 if (single_use_stmt
!= NULL
&& single_use_stmt
!= current_use_stmt
)
297 single_use_stmt
= current_use_stmt
;
301 if (single_use_stmt
== NULL
)
304 if (gimple_bb (stmt
)->loop_father
305 != gimple_bb (single_use_stmt
)->loop_father
)
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
319 phi_rank (gimple
*stmt
)
321 basic_block bb
= gimple_bb (stmt
);
322 class loop
*father
= bb
->loop_father
;
328 if (!reassoc_bias_loop_carried_phi_ranks_p
)
329 return bb_rank
[bb
->index
];
331 /* We only care about real loops (those with a latch). */
333 return bb_rank
[bb
->index
];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb
!= father
->header
338 return bb_rank
[bb
->index
];
340 /* Ignore virtual SSA_NAMEs. */
341 res
= gimple_phi_result (stmt
);
342 if (virtual_operand_p (res
))
343 return bb_rank
[bb
->index
];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res
, &use
, &use_stmt
)
348 || gimple_bb (use_stmt
)->loop_father
!= father
)
349 return bb_rank
[bb
->index
];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i
= 0; i
< gimple_phi_num_args (stmt
); i
++)
354 tree arg
= gimple_phi_arg_def (stmt
, i
);
355 if (TREE_CODE (arg
) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg
))
358 gimple
*def_stmt
= SSA_NAME_DEF_STMT (arg
);
359 if (gimple_bb (def_stmt
)->loop_father
== father
)
360 return bb_rank
[father
->latch
->index
] + PHI_LOOP_BIAS
;
364 /* Must be an uninteresting phi. */
365 return bb_rank
[bb
->index
];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
373 propagate_rank (int64_t rank
, tree op
, bool *maybe_biased_p
)
377 op_rank
= get_rank (op
);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op
) == SSA_NAME
382 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (op
)))
384 if (maybe_biased_p
== NULL
)
386 *maybe_biased_p
= true;
389 return MAX (rank
, op_rank
);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e
)
397 int64_t *slot
= operand_rank
->get (e
);
398 return slot
? *slot
: -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
404 insert_operand_rank (tree e
, int64_t rank
)
406 gcc_assert (rank
> 0);
407 bool existed
= operand_rank
->put (e
, rank
);
408 gcc_assert (!existed
);
411 /* Given an expression E, return the rank of the expression. */
416 /* SSA_NAME's have the rank of the expression they are the result
418 For globals and uninitialized values, the rank is 0.
419 For function arguments, use the pre-setup rank.
420 For PHI nodes, stores, asm statements, etc, we use the rank of
422 For simple operations, the rank is the maximum rank of any of
423 its operands, or the bb_rank, whichever is less.
424 I make no claims that this is optimal, however, it gives good
427 /* We make an exception to the normal ranking system to break
428 dependences of accumulator variables in loops. Suppose we
429 have a simple one-block loop containing:
436 As shown, each iteration of the calculation into x is fully
437 dependent upon the iteration before it. We would prefer to
438 see this in the form:
445 If the loop is unrolled, the calculations of b and c from
446 different iterations can be interleaved.
448 To obtain this result during reassociation, we bias the rank
449 of the phi definition x_1 upward, when it is recognized as an
450 accumulator pattern. The artificial rank causes it to be
451 added last, providing the desired independence. */
453 if (TREE_CODE (e
) == SSA_NAME
)
460 /* If we already have a rank for this expression, use that. */
461 rank
= find_operand_rank (e
);
465 stmt
= SSA_NAME_DEF_STMT (e
);
466 if (gimple_code (stmt
) == GIMPLE_PHI
)
468 rank
= phi_rank (stmt
);
469 if (rank
!= bb_rank
[gimple_bb (stmt
)->index
])
470 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
473 else if (!is_gimple_assign (stmt
))
474 rank
= bb_rank
[gimple_bb (stmt
)->index
];
478 bool biased_p
= false;
479 bool *maybe_biased_p
= propagate_bias_p (stmt
) ? &biased_p
: NULL
;
481 /* Otherwise, find the maximum rank for the operands. As an
482 exception, remove the bias from loop-carried phis when propagating
483 the rank so that dependent operations are not also biased. */
484 /* Simply walk over all SSA uses - this takes advatage of the
485 fact that non-SSA operands are is_gimple_min_invariant and
488 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
489 rank
= propagate_rank (rank
, op
, maybe_biased_p
);
493 bitmap_set_bit (biased_names
, SSA_NAME_VERSION (e
));
496 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
498 fprintf (dump_file
, "Rank for ");
499 print_generic_expr (dump_file
, e
);
500 fprintf (dump_file
, " is %" PRId64
"\n", rank
);
503 /* Note the rank in the hashtable so we don't recompute it. */
504 insert_operand_rank (e
, rank
);
508 /* Constants, globals, etc., are rank 0 */
513 /* We want integer ones to end up last no matter what, since they are
514 the ones we can do the most with. */
515 #define INTEGER_CONST_TYPE 1 << 4
516 #define FLOAT_ONE_CONST_TYPE 1 << 3
517 #define FLOAT_CONST_TYPE 1 << 2
518 #define OTHER_CONST_TYPE 1 << 1
520 /* Classify an invariant tree into integer, float, or other, so that
521 we can sort them to be near other constants of the same type. */
523 constant_type (tree t
)
525 if (INTEGRAL_TYPE_P (TREE_TYPE (t
)))
526 return INTEGER_CONST_TYPE
;
527 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t
)))
529 /* Sort -1.0 and 1.0 constants last, while in some cases
530 const_binop can't optimize some inexact operations, multiplication
531 by -1.0 or 1.0 can be always merged with others. */
532 if (real_onep (t
) || real_minus_onep (t
))
533 return FLOAT_ONE_CONST_TYPE
;
534 return FLOAT_CONST_TYPE
;
537 return OTHER_CONST_TYPE
;
540 /* qsort comparison function to sort operand entries PA and PB by rank
541 so that the sorted array is ordered by rank in decreasing order. */
543 sort_by_operand_rank (const void *pa
, const void *pb
)
545 const operand_entry
*oea
= *(const operand_entry
*const *)pa
;
546 const operand_entry
*oeb
= *(const operand_entry
*const *)pb
;
548 if (oeb
->rank
!= oea
->rank
)
549 return oeb
->rank
> oea
->rank
? 1 : -1;
551 /* It's nicer for optimize_expression if constants that are likely
552 to fold when added/multiplied/whatever are put next to each
553 other. Since all constants have rank 0, order them by type. */
556 if (constant_type (oeb
->op
) != constant_type (oea
->op
))
557 return constant_type (oea
->op
) - constant_type (oeb
->op
);
559 /* To make sorting result stable, we use unique IDs to determine
561 return oeb
->id
> oea
->id
? 1 : -1;
564 if (TREE_CODE (oea
->op
) != SSA_NAME
)
566 if (TREE_CODE (oeb
->op
) != SSA_NAME
)
567 return oeb
->id
> oea
->id
? 1 : -1;
571 else if (TREE_CODE (oeb
->op
) != SSA_NAME
)
574 /* Lastly, make sure the versions that are the same go next to each
576 if (SSA_NAME_VERSION (oeb
->op
) != SSA_NAME_VERSION (oea
->op
))
578 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
579 versions of removed SSA_NAMEs, so if possible, prefer to sort
580 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
582 gimple
*stmta
= SSA_NAME_DEF_STMT (oea
->op
);
583 gimple
*stmtb
= SSA_NAME_DEF_STMT (oeb
->op
);
584 basic_block bba
= gimple_bb (stmta
);
585 basic_block bbb
= gimple_bb (stmtb
);
588 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
589 but the other might not. */
594 /* If neither is, compare bb_rank. */
595 if (bb_rank
[bbb
->index
] != bb_rank
[bba
->index
])
596 return (bb_rank
[bbb
->index
] >> 16) - (bb_rank
[bba
->index
] >> 16);
599 bool da
= reassoc_stmt_dominates_stmt_p (stmta
, stmtb
);
600 bool db
= reassoc_stmt_dominates_stmt_p (stmtb
, stmta
);
604 return SSA_NAME_VERSION (oeb
->op
) > SSA_NAME_VERSION (oea
->op
) ? 1 : -1;
607 return oeb
->id
> oea
->id
? 1 : -1;
610 /* Add an operand entry to *OPS for the tree operand OP. */
613 add_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
, gimple
*stmt_to_insert
= NULL
)
615 operand_entry
*oe
= operand_entry_pool
.allocate ();
618 oe
->rank
= get_rank (op
);
619 oe
->id
= next_operand_entry_id
++;
621 oe
->stmt_to_insert
= stmt_to_insert
;
625 /* Add an operand entry to *OPS for the tree operand OP with repeat
629 add_repeat_to_ops_vec (vec
<operand_entry
*> *ops
, tree op
,
630 HOST_WIDE_INT repeat
)
632 operand_entry
*oe
= operand_entry_pool
.allocate ();
635 oe
->rank
= get_rank (op
);
636 oe
->id
= next_operand_entry_id
++;
638 oe
->stmt_to_insert
= NULL
;
641 reassociate_stats
.pows_encountered
++;
644 /* Returns true if we can associate the SSA def OP. */
647 can_reassociate_op_p (tree op
)
649 if (TREE_CODE (op
) == SSA_NAME
&& SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op
))
651 /* Uninitialized variables can't participate in reassociation. */
652 if (TREE_CODE (op
) == SSA_NAME
&& ssa_name_maybe_undef_p (op
))
654 /* Make sure asm goto outputs do not participate in reassociation since
655 we have no way to find an insertion place after asm goto. */
656 if (TREE_CODE (op
) == SSA_NAME
657 && gimple_code (SSA_NAME_DEF_STMT (op
)) == GIMPLE_ASM
658 && gimple_asm_nlabels (as_a
<gasm
*> (SSA_NAME_DEF_STMT (op
))) != 0)
663 /* Returns true if we can reassociate operations of TYPE.
664 That is for integral or non-saturating fixed-point types, and for
665 floating point type when associative-math is enabled. */
668 can_reassociate_type_p (tree type
)
670 if ((ANY_INTEGRAL_TYPE_P (type
) && TYPE_OVERFLOW_WRAPS (type
))
671 || NON_SAT_FIXED_POINT_TYPE_P (type
)
672 || (flag_associative_math
&& FLOAT_TYPE_P (type
)))
677 /* Return true if STMT is reassociable operation containing a binary
678 operation with tree code CODE, and is inside LOOP. */
681 is_reassociable_op (gimple
*stmt
, enum tree_code code
, class loop
*loop
)
683 basic_block bb
= gimple_bb (stmt
);
685 if (gimple_bb (stmt
) == NULL
)
688 if (!flow_bb_inside_loop_p (loop
, bb
))
691 if (is_gimple_assign (stmt
)
692 && gimple_assign_rhs_code (stmt
) == code
693 && has_single_use (gimple_assign_lhs (stmt
)))
695 tree rhs1
= gimple_assign_rhs1 (stmt
);
696 tree rhs2
= gimple_assign_rhs2 (stmt
);
697 if (!can_reassociate_op_p (rhs1
)
698 || (rhs2
&& !can_reassociate_op_p (rhs2
)))
707 /* Return true if STMT is a nop-conversion. */
710 gimple_nop_conversion_p (gimple
*stmt
)
712 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
))
714 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass
))
715 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass
)),
716 TREE_TYPE (gimple_assign_rhs1 (ass
))))
722 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
723 operand of the negate operation. Otherwise, return NULL. */
726 get_unary_op (tree name
, enum tree_code opcode
)
728 gimple
*stmt
= SSA_NAME_DEF_STMT (name
);
730 /* Look through nop conversions (sign changes). */
731 if (gimple_nop_conversion_p (stmt
)
732 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
733 stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
735 if (!is_gimple_assign (stmt
))
738 if (gimple_assign_rhs_code (stmt
) == opcode
)
739 return gimple_assign_rhs1 (stmt
);
743 /* Return true if OP1 and OP2 have the same value if casted to either type. */
746 ops_equal_values_p (tree op1
, tree op2
)
752 if (TREE_CODE (op1
) == SSA_NAME
)
754 gimple
*stmt
= SSA_NAME_DEF_STMT (op1
);
755 if (gimple_nop_conversion_p (stmt
))
757 op1
= gimple_assign_rhs1 (stmt
);
763 if (TREE_CODE (op2
) == SSA_NAME
)
765 gimple
*stmt
= SSA_NAME_DEF_STMT (op2
);
766 if (gimple_nop_conversion_p (stmt
))
768 op2
= gimple_assign_rhs1 (stmt
);
779 /* If CURR and LAST are a pair of ops that OPCODE allows us to
780 eliminate through equivalences, do so, remove them from OPS, and
781 return true. Otherwise, return false. */
784 eliminate_duplicate_pair (enum tree_code opcode
,
785 vec
<operand_entry
*> *ops
,
792 /* If we have two of the same op, and the opcode is & |, min, or max,
793 we can eliminate one of them.
794 If we have two of the same op, and the opcode is ^, we can
795 eliminate both of them. */
797 if (last
&& last
->op
== curr
->op
)
805 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
807 fprintf (dump_file
, "Equivalence: ");
808 print_generic_expr (dump_file
, curr
->op
);
809 fprintf (dump_file
, " [&|minmax] ");
810 print_generic_expr (dump_file
, last
->op
);
811 fprintf (dump_file
, " -> ");
812 print_generic_stmt (dump_file
, last
->op
);
815 ops
->ordered_remove (i
);
816 reassociate_stats
.ops_eliminated
++;
821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
823 fprintf (dump_file
, "Equivalence: ");
824 print_generic_expr (dump_file
, curr
->op
);
825 fprintf (dump_file
, " ^ ");
826 print_generic_expr (dump_file
, last
->op
);
827 fprintf (dump_file
, " -> nothing\n");
830 reassociate_stats
.ops_eliminated
+= 2;
832 if (ops
->length () == 2)
835 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (last
->op
)));
840 ops
->ordered_remove (i
-1);
841 ops
->ordered_remove (i
-1);
853 static vec
<tree
> plus_negates
;
855 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
856 expression, look in OPS for a corresponding positive operation to cancel
857 it out. If we find one, remove the other from OPS, replace
858 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
862 eliminate_plus_minus_pair (enum tree_code opcode
,
863 vec
<operand_entry
*> *ops
,
864 unsigned int currindex
,
872 if (opcode
!= PLUS_EXPR
|| TREE_CODE (curr
->op
) != SSA_NAME
)
875 negateop
= get_unary_op (curr
->op
, NEGATE_EXPR
);
876 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
877 if (negateop
== NULL_TREE
&& notop
== NULL_TREE
)
880 /* Any non-negated version will have a rank that is one less than
881 the current rank. So once we hit those ranks, if we don't find
884 for (i
= currindex
+ 1;
885 ops
->iterate (i
, &oe
)
886 && oe
->rank
>= curr
->rank
- 1 ;
890 && ops_equal_values_p (oe
->op
, negateop
))
892 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
894 fprintf (dump_file
, "Equivalence: ");
895 print_generic_expr (dump_file
, negateop
);
896 fprintf (dump_file
, " + -");
897 print_generic_expr (dump_file
, oe
->op
);
898 fprintf (dump_file
, " -> 0\n");
901 ops
->ordered_remove (i
);
902 add_to_ops_vec (ops
, build_zero_cst (TREE_TYPE (oe
->op
)));
903 ops
->ordered_remove (currindex
);
904 reassociate_stats
.ops_eliminated
++;
909 && ops_equal_values_p (oe
->op
, notop
))
911 tree op_type
= TREE_TYPE (oe
->op
);
913 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
915 fprintf (dump_file
, "Equivalence: ");
916 print_generic_expr (dump_file
, notop
);
917 fprintf (dump_file
, " + ~");
918 print_generic_expr (dump_file
, oe
->op
);
919 fprintf (dump_file
, " -> -1\n");
922 ops
->ordered_remove (i
);
923 add_to_ops_vec (ops
, build_all_ones_cst (op_type
));
924 ops
->ordered_remove (currindex
);
925 reassociate_stats
.ops_eliminated
++;
931 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
932 save it for later inspection in repropagate_negates(). */
933 if (negateop
!= NULL_TREE
934 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr
->op
)) == NEGATE_EXPR
)
935 plus_negates
.safe_push (curr
->op
);
940 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
941 bitwise not expression, look in OPS for a corresponding operand to
942 cancel it out. If we find one, remove the other from OPS, replace
943 OPS[CURRINDEX] with 0, and return true. Otherwise, return
947 eliminate_not_pairs (enum tree_code opcode
,
948 vec
<operand_entry
*> *ops
,
949 unsigned int currindex
,
956 if ((opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
957 || TREE_CODE (curr
->op
) != SSA_NAME
)
960 notop
= get_unary_op (curr
->op
, BIT_NOT_EXPR
);
961 if (notop
== NULL_TREE
)
964 /* Any non-not version will have a rank that is one less than
965 the current rank. So once we hit those ranks, if we don't find
968 for (i
= currindex
+ 1;
969 ops
->iterate (i
, &oe
)
970 && oe
->rank
>= curr
->rank
- 1;
975 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
977 fprintf (dump_file
, "Equivalence: ");
978 print_generic_expr (dump_file
, notop
);
979 if (opcode
== BIT_AND_EXPR
)
980 fprintf (dump_file
, " & ~");
981 else if (opcode
== BIT_IOR_EXPR
)
982 fprintf (dump_file
, " | ~");
983 print_generic_expr (dump_file
, oe
->op
);
984 if (opcode
== BIT_AND_EXPR
)
985 fprintf (dump_file
, " -> 0\n");
986 else if (opcode
== BIT_IOR_EXPR
)
987 fprintf (dump_file
, " -> -1\n");
990 if (opcode
== BIT_AND_EXPR
)
991 oe
->op
= build_zero_cst (TREE_TYPE (oe
->op
));
992 else if (opcode
== BIT_IOR_EXPR
)
993 oe
->op
= build_all_ones_cst (TREE_TYPE (oe
->op
));
995 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
997 ops
->quick_push (oe
);
1005 /* Use constant value that may be present in OPS to try to eliminate
1006 operands. Note that this function is only really used when we've
1007 eliminated ops for other reasons, or merged constants. Across
1008 single statements, fold already does all of this, plus more. There
1009 is little point in duplicating logic, so I've only included the
1010 identities that I could ever construct testcases to trigger. */
1013 eliminate_using_constants (enum tree_code opcode
,
1014 vec
<operand_entry
*> *ops
)
1016 operand_entry
*oelast
= ops
->last ();
1017 tree type
= TREE_TYPE (oelast
->op
);
1019 if (oelast
->rank
== 0
1020 && (ANY_INTEGRAL_TYPE_P (type
) || FLOAT_TYPE_P (type
)))
1025 if (integer_zerop (oelast
->op
))
1027 if (ops
->length () != 1)
1029 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1030 fprintf (dump_file
, "Found & 0, removing all other ops\n");
1032 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1035 ops
->quick_push (oelast
);
1039 else if (integer_all_onesp (oelast
->op
))
1041 if (ops
->length () != 1)
1043 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1044 fprintf (dump_file
, "Found & -1, removing\n");
1046 reassociate_stats
.ops_eliminated
++;
1051 if (integer_all_onesp (oelast
->op
))
1053 if (ops
->length () != 1)
1055 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1056 fprintf (dump_file
, "Found | -1, removing all other ops\n");
1058 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1061 ops
->quick_push (oelast
);
1065 else if (integer_zerop (oelast
->op
))
1067 if (ops
->length () != 1)
1069 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1070 fprintf (dump_file
, "Found | 0, removing\n");
1072 reassociate_stats
.ops_eliminated
++;
1077 if (integer_zerop (oelast
->op
)
1078 || (FLOAT_TYPE_P (type
)
1079 && !HONOR_NANS (type
)
1080 && !HONOR_SIGNED_ZEROS (type
)
1081 && real_zerop (oelast
->op
)))
1083 if (ops
->length () != 1)
1085 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1086 fprintf (dump_file
, "Found * 0, removing all other ops\n");
1088 reassociate_stats
.ops_eliminated
+= ops
->length () - 1;
1090 ops
->quick_push (oelast
);
1094 else if (integer_onep (oelast
->op
)
1095 || (FLOAT_TYPE_P (type
)
1096 && !HONOR_SNANS (type
)
1097 && real_onep (oelast
->op
)))
1099 if (ops
->length () != 1)
1101 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1102 fprintf (dump_file
, "Found * 1, removing\n");
1104 reassociate_stats
.ops_eliminated
++;
1112 if (integer_zerop (oelast
->op
)
1113 || (FLOAT_TYPE_P (type
)
1114 && (opcode
== PLUS_EXPR
|| opcode
== MINUS_EXPR
)
1115 && fold_real_zero_addition_p (type
, 0, oelast
->op
,
1116 opcode
== MINUS_EXPR
)))
1118 if (ops
->length () != 1)
1120 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1121 fprintf (dump_file
, "Found [|^+] 0, removing\n");
1123 reassociate_stats
.ops_eliminated
++;
1135 static void linearize_expr_tree (vec
<operand_entry
*> *, gimple
*,
1138 /* Structure for tracking and counting operands. */
1142 enum tree_code oecode
;
1147 /* The heap for the oecount hashtable and the sorted list of operands. */
1148 static vec
<oecount
> cvec
;
1151 /* Oecount hashtable helpers. */
1153 struct oecount_hasher
: int_hash
<int, 0, 1>
1155 static inline hashval_t
hash (int);
1156 static inline bool equal (int, int);
1159 /* Hash function for oecount. */
1162 oecount_hasher::hash (int p
)
1164 const oecount
*c
= &cvec
[p
- 42];
1165 return htab_hash_pointer (c
->op
) ^ (hashval_t
)c
->oecode
;
1168 /* Comparison function for oecount. */
1171 oecount_hasher::equal (int p1
, int p2
)
1173 const oecount
*c1
= &cvec
[p1
- 42];
1174 const oecount
*c2
= &cvec
[p2
- 42];
1175 return c1
->oecode
== c2
->oecode
&& c1
->op
== c2
->op
;
1178 /* Comparison function for qsort sorting oecount elements by count. */
1181 oecount_cmp (const void *p1
, const void *p2
)
1183 const oecount
*c1
= (const oecount
*)p1
;
1184 const oecount
*c2
= (const oecount
*)p2
;
1185 if (c1
->cnt
!= c2
->cnt
)
1186 return c1
->cnt
> c2
->cnt
? 1 : -1;
1188 /* If counts are identical, use unique IDs to stabilize qsort. */
1189 return c1
->id
> c2
->id
? 1 : -1;
1192 /* Return TRUE iff STMT represents a builtin call that raises OP
1193 to some exponent. */
1196 stmt_is_power_of_op (gimple
*stmt
, tree op
)
1198 if (!is_gimple_call (stmt
))
1201 switch (gimple_call_combined_fn (stmt
))
1205 return (operand_equal_p (gimple_call_arg (stmt
, 0), op
, 0));
1212 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1213 in place and return the result. Assumes that stmt_is_power_of_op
1214 was previously called for STMT and returned TRUE. */
1216 static HOST_WIDE_INT
1217 decrement_power (gimple
*stmt
)
1219 REAL_VALUE_TYPE c
, cint
;
1220 HOST_WIDE_INT power
;
1223 switch (gimple_call_combined_fn (stmt
))
1226 arg1
= gimple_call_arg (stmt
, 1);
1227 c
= TREE_REAL_CST (arg1
);
1228 power
= real_to_integer (&c
) - 1;
1229 real_from_integer (&cint
, VOIDmode
, power
, SIGNED
);
1230 gimple_call_set_arg (stmt
, 1, build_real (TREE_TYPE (arg1
), cint
));
1234 arg1
= gimple_call_arg (stmt
, 1);
1235 power
= TREE_INT_CST_LOW (arg1
) - 1;
1236 gimple_call_set_arg (stmt
, 1, build_int_cst (TREE_TYPE (arg1
), power
));
1244 /* Replace SSA defined by STMT and replace all its uses with new
1245 SSA. Also return the new SSA. */
1248 make_new_ssa_for_def (gimple
*stmt
, enum tree_code opcode
, tree op
)
1252 imm_use_iterator iter
;
1253 tree new_lhs
, new_debug_lhs
= NULL_TREE
;
1254 tree lhs
= gimple_get_lhs (stmt
);
1256 new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
1257 gimple_set_lhs (stmt
, new_lhs
);
1259 /* Also need to update GIMPLE_DEBUGs. */
1260 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
1262 tree repl
= new_lhs
;
1263 if (is_gimple_debug (use_stmt
))
1265 if (new_debug_lhs
== NULL_TREE
)
1267 new_debug_lhs
= build_debug_expr_decl (TREE_TYPE (lhs
));
1269 = gimple_build_debug_bind (new_debug_lhs
,
1270 build2 (opcode
, TREE_TYPE (lhs
),
1273 gimple_set_uid (def_temp
, gimple_uid (stmt
));
1274 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
1275 gsi_insert_after (&gsi
, def_temp
, GSI_SAME_STMT
);
1277 repl
= new_debug_lhs
;
1279 FOR_EACH_IMM_USE_ON_STMT (use
, iter
)
1280 SET_USE (use
, repl
);
1281 update_stmt (use_stmt
);
1286 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1287 uses with new SSAs. Also do this for the stmt that defines DEF
1288 if *DEF is not OP. */
1291 make_new_ssa_for_all_defs (tree
*def
, enum tree_code opcode
, tree op
,
1292 vec
<gimple
*> &stmts_to_fix
)
1298 && TREE_CODE (*def
) == SSA_NAME
1299 && (stmt
= SSA_NAME_DEF_STMT (*def
))
1300 && gimple_code (stmt
) != GIMPLE_NOP
)
1301 *def
= make_new_ssa_for_def (stmt
, opcode
, op
);
1303 FOR_EACH_VEC_ELT (stmts_to_fix
, i
, stmt
)
1304 make_new_ssa_for_def (stmt
, opcode
, op
);
1307 /* Find the single immediate use of STMT's LHS, and replace it
1308 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1309 replace *DEF with OP as well. */
1312 propagate_op_to_single_use (tree op
, gimple
*stmt
, tree
*def
)
1317 gimple_stmt_iterator gsi
;
1319 if (is_gimple_call (stmt
))
1320 lhs
= gimple_call_lhs (stmt
);
1322 lhs
= gimple_assign_lhs (stmt
);
1324 gcc_assert (has_single_use (lhs
));
1325 single_imm_use (lhs
, &use
, &use_stmt
);
1329 if (TREE_CODE (op
) != SSA_NAME
)
1330 update_stmt (use_stmt
);
1331 gsi
= gsi_for_stmt (stmt
);
1332 unlink_stmt_vdef (stmt
);
1333 reassoc_remove_stmt (&gsi
);
1334 release_defs (stmt
);
1337 /* Walks the linear chain with result *DEF searching for an operation
1338 with operand OP and code OPCODE removing that from the chain. *DEF
1339 is updated if there is only one operand but no operation left. */
1342 zero_one_operation (tree
*def
, enum tree_code opcode
, tree op
)
1344 tree orig_def
= *def
;
1345 gimple
*stmt
= SSA_NAME_DEF_STMT (*def
);
1346 /* PR72835 - Record the stmt chain that has to be updated such that
1347 we dont use the same LHS when the values computed are different. */
1348 auto_vec
<gimple
*, 64> stmts_to_fix
;
1354 if (opcode
== MULT_EXPR
)
1356 if (stmt_is_power_of_op (stmt
, op
))
1358 if (decrement_power (stmt
) == 1)
1360 if (stmts_to_fix
.length () > 0)
1361 stmts_to_fix
.pop ();
1362 propagate_op_to_single_use (op
, stmt
, def
);
1366 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
)
1368 if (gimple_assign_rhs1 (stmt
) == op
)
1370 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1371 if (stmts_to_fix
.length () > 0)
1372 stmts_to_fix
.pop ();
1373 propagate_op_to_single_use (cst
, stmt
, def
);
1376 else if (integer_minus_onep (op
)
1377 || real_minus_onep (op
))
1379 gimple_assign_set_rhs_code
1380 (stmt
, TREE_CODE (gimple_assign_rhs1 (stmt
)));
1386 name
= gimple_assign_rhs1 (stmt
);
1388 /* If this is the operation we look for and one of the operands
1389 is ours simply propagate the other operand into the stmts
1391 if (gimple_assign_rhs_code (stmt
) == opcode
1393 || gimple_assign_rhs2 (stmt
) == op
))
1396 name
= gimple_assign_rhs2 (stmt
);
1397 if (stmts_to_fix
.length () > 0)
1398 stmts_to_fix
.pop ();
1399 propagate_op_to_single_use (name
, stmt
, def
);
1403 /* We might have a multiply of two __builtin_pow* calls, and
1404 the operand might be hiding in the rightmost one. Likewise
1405 this can happen for a negate. */
1406 if (opcode
== MULT_EXPR
1407 && gimple_assign_rhs_code (stmt
) == opcode
1408 && TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
1409 && has_single_use (gimple_assign_rhs2 (stmt
)))
1411 gimple
*stmt2
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
1412 if (stmt_is_power_of_op (stmt2
, op
))
1414 if (decrement_power (stmt2
) == 1)
1415 propagate_op_to_single_use (op
, stmt2
, def
);
1417 stmts_to_fix
.safe_push (stmt2
);
1420 else if (is_gimple_assign (stmt2
)
1421 && gimple_assign_rhs_code (stmt2
) == NEGATE_EXPR
)
1423 if (gimple_assign_rhs1 (stmt2
) == op
)
1425 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
1426 propagate_op_to_single_use (cst
, stmt2
, def
);
1429 else if (integer_minus_onep (op
)
1430 || real_minus_onep (op
))
1432 stmts_to_fix
.safe_push (stmt2
);
1433 gimple_assign_set_rhs_code
1434 (stmt2
, TREE_CODE (gimple_assign_rhs1 (stmt2
)));
1440 /* Continue walking the chain. */
1441 gcc_assert (name
!= op
1442 && TREE_CODE (name
) == SSA_NAME
);
1443 stmt
= SSA_NAME_DEF_STMT (name
);
1444 stmts_to_fix
.safe_push (stmt
);
1448 if (stmts_to_fix
.length () > 0 || *def
== orig_def
)
1449 make_new_ssa_for_all_defs (def
, opcode
, op
, stmts_to_fix
);
1452 /* Returns true if statement S1 dominates statement S2. Like
1453 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1456 reassoc_stmt_dominates_stmt_p (gimple
*s1
, gimple
*s2
)
1458 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1460 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1461 SSA_NAME. Assume it lives at the beginning of function and
1462 thus dominates everything. */
1463 if (!bb1
|| s1
== s2
)
1466 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1472 /* PHIs in the same basic block are assumed to be
1473 executed all in parallel, if only one stmt is a PHI,
1474 it dominates the other stmt in the same basic block. */
1475 if (gimple_code (s1
) == GIMPLE_PHI
)
1478 if (gimple_code (s2
) == GIMPLE_PHI
)
1481 gcc_assert (gimple_uid (s1
) && gimple_uid (s2
));
1483 if (gimple_uid (s1
) < gimple_uid (s2
))
1486 if (gimple_uid (s1
) > gimple_uid (s2
))
1489 gimple_stmt_iterator gsi
= gsi_for_stmt (s1
);
1490 unsigned int uid
= gimple_uid (s1
);
1491 for (gsi_next (&gsi
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1493 gimple
*s
= gsi_stmt (gsi
);
1494 if (gimple_uid (s
) != uid
)
1503 return dominated_by_p (CDI_DOMINATORS
, bb2
, bb1
);
1506 /* Insert STMT after INSERT_POINT. */
1509 insert_stmt_after (gimple
*stmt
, gimple
*insert_point
)
1511 gimple_stmt_iterator gsi
;
1514 if (gimple_code (insert_point
) == GIMPLE_PHI
)
1515 bb
= gimple_bb (insert_point
);
1516 else if (!stmt_ends_bb_p (insert_point
))
1518 gsi
= gsi_for_stmt (insert_point
);
1519 gimple_set_uid (stmt
, gimple_uid (insert_point
));
1520 gsi_insert_after (&gsi
, stmt
, GSI_NEW_STMT
);
1523 else if (gimple_code (insert_point
) == GIMPLE_ASM
1524 && gimple_asm_nlabels (as_a
<gasm
*> (insert_point
)) != 0)
1525 /* We have no idea where to insert - it depends on where the
1526 uses will be placed. */
1529 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1530 thus if it must end a basic block, it should be a call that can
1531 throw, or some assignment that can throw. If it throws, the LHS
1532 of it will not be initialized though, so only valid places using
1533 the SSA_NAME should be dominated by the fallthru edge. */
1534 bb
= find_fallthru_edge (gimple_bb (insert_point
)->succs
)->dest
;
1535 gsi
= gsi_after_labels (bb
);
1536 if (gsi_end_p (gsi
))
1538 gimple_stmt_iterator gsi2
= gsi_last_bb (bb
);
1539 gimple_set_uid (stmt
,
1540 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1543 gimple_set_uid (stmt
, gimple_uid (gsi_stmt (gsi
)));
1544 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
1547 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1548 the result. Places the statement after the definition of either
1549 OP1 or OP2. Returns the new statement. */
1552 build_and_add_sum (tree type
, tree op1
, tree op2
, enum tree_code opcode
)
1554 gimple
*op1def
= NULL
, *op2def
= NULL
;
1555 gimple_stmt_iterator gsi
;
1559 /* Create the addition statement. */
1560 op
= make_ssa_name (type
);
1561 sum
= gimple_build_assign (op
, opcode
, op1
, op2
);
1563 /* Find an insertion place and insert. */
1564 if (TREE_CODE (op1
) == SSA_NAME
)
1565 op1def
= SSA_NAME_DEF_STMT (op1
);
1566 if (TREE_CODE (op2
) == SSA_NAME
)
1567 op2def
= SSA_NAME_DEF_STMT (op2
);
1568 if ((!op1def
|| gimple_nop_p (op1def
))
1569 && (!op2def
|| gimple_nop_p (op2def
)))
1571 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1572 if (!gsi_end_p (gsi
)
1573 && is_gimple_call (gsi_stmt (gsi
))
1574 && (gimple_call_flags (gsi_stmt (gsi
)) & ECF_RETURNS_TWICE
))
1576 /* Don't add statements before a returns_twice call at the start
1578 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1579 gsi
= gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1581 if (gsi_end_p (gsi
))
1583 gimple_stmt_iterator gsi2
1584 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun
)));
1585 gimple_set_uid (sum
,
1586 gsi_end_p (gsi2
) ? 1 : gimple_uid (gsi_stmt (gsi2
)));
1589 gimple_set_uid (sum
, gimple_uid (gsi_stmt (gsi
)));
1590 gsi_insert_before (&gsi
, sum
, GSI_NEW_STMT
);
1594 gimple
*insert_point
;
1595 if ((!op1def
|| gimple_nop_p (op1def
))
1596 || (op2def
&& !gimple_nop_p (op2def
)
1597 && reassoc_stmt_dominates_stmt_p (op1def
, op2def
)))
1598 insert_point
= op2def
;
1600 insert_point
= op1def
;
1601 insert_stmt_after (sum
, insert_point
);
1608 /* Perform un-distribution of divisions and multiplications.
1609 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1610 to (A + B) / X for real X.
1612 The algorithm is organized as follows.
1614 - First we walk the addition chain *OPS looking for summands that
1615 are defined by a multiplication or a real division. This results
1616 in the candidates bitmap with relevant indices into *OPS.
1618 - Second we build the chains of multiplications or divisions for
1619 these candidates, counting the number of occurrences of (operand, code)
1620 pairs in all of the candidates chains.
1622 - Third we sort the (operand, code) pairs by number of occurrence and
1623 process them starting with the pair with the most uses.
1625 * For each such pair we walk the candidates again to build a
1626 second candidate bitmap noting all multiplication/division chains
1627 that have at least one occurrence of (operand, code).
1629 * We build an alternate addition chain only covering these
1630 candidates with one (operand, code) operation removed from their
1631 multiplication/division chain.
1633 * The first candidate gets replaced by the alternate addition chain
1634 multiplied/divided by the operand.
1636 * All candidate chains get disabled for further processing and
1637 processing of (operand, code) pairs continues.
1639 The alternate addition chains built are re-processed by the main
1640 reassociation algorithm which allows optimizing a * x * y + b * y * x
1641 to (a + b ) * x * y in one invocation of the reassociation pass. */
1644 undistribute_ops_list (enum tree_code opcode
,
1645 vec
<operand_entry
*> *ops
, class loop
*loop
)
1647 unsigned int length
= ops
->length ();
1650 unsigned nr_candidates
, nr_candidates2
;
1651 sbitmap_iterator sbi0
;
1652 vec
<operand_entry
*> *subops
;
1653 bool changed
= false;
1654 unsigned int next_oecount_id
= 0;
1657 || opcode
!= PLUS_EXPR
)
1660 /* Build a list of candidates to process. */
1661 auto_sbitmap
candidates (length
);
1662 bitmap_clear (candidates
);
1664 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1666 enum tree_code dcode
;
1669 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1671 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1672 if (!is_gimple_assign (oe1def
))
1674 dcode
= gimple_assign_rhs_code (oe1def
);
1675 if ((dcode
!= MULT_EXPR
1676 && dcode
!= RDIV_EXPR
)
1677 || !is_reassociable_op (oe1def
, dcode
, loop
))
1680 bitmap_set_bit (candidates
, i
);
1684 if (nr_candidates
< 2)
1687 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1689 fprintf (dump_file
, "searching for un-distribute opportunities ");
1690 print_generic_expr (dump_file
,
1691 (*ops
)[bitmap_first_set_bit (candidates
)]->op
, TDF_NONE
);
1692 fprintf (dump_file
, " %d\n", nr_candidates
);
1695 /* Build linearized sub-operand lists and the counting table. */
1698 hash_table
<oecount_hasher
> ctable (15);
1700 /* ??? Macro arguments cannot have multi-argument template types in
1701 them. This typedef is needed to workaround that limitation. */
1702 typedef vec
<operand_entry
*> vec_operand_entry_t_heap
;
1703 subops
= XCNEWVEC (vec_operand_entry_t_heap
, ops
->length ());
1704 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1707 enum tree_code oecode
;
1710 oedef
= SSA_NAME_DEF_STMT ((*ops
)[i
]->op
);
1711 oecode
= gimple_assign_rhs_code (oedef
);
1712 linearize_expr_tree (&subops
[i
], oedef
,
1713 associative_tree_code (oecode
), false);
1715 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1722 c
.id
= next_oecount_id
++;
1725 idx
= cvec
.length () + 41;
1726 slot
= ctable
.find_slot (idx
, INSERT
);
1734 cvec
[*slot
- 42].cnt
++;
1739 /* Sort the counting table. */
1740 cvec
.qsort (oecount_cmp
);
1742 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1745 fprintf (dump_file
, "Candidates:\n");
1746 FOR_EACH_VEC_ELT (cvec
, j
, c
)
1748 fprintf (dump_file
, " %u %s: ", c
->cnt
,
1749 c
->oecode
== MULT_EXPR
1750 ? "*" : c
->oecode
== RDIV_EXPR
? "/" : "?");
1751 print_generic_expr (dump_file
, c
->op
);
1752 fprintf (dump_file
, "\n");
1756 /* Process the (operand, code) pairs in order of most occurrence. */
1757 auto_sbitmap
candidates2 (length
);
1758 while (!cvec
.is_empty ())
1760 oecount
*c
= &cvec
.last ();
1764 /* Now collect the operands in the outer chain that contain
1765 the common operand in their inner chain. */
1766 bitmap_clear (candidates2
);
1768 EXECUTE_IF_SET_IN_BITMAP (candidates
, 0, i
, sbi0
)
1771 enum tree_code oecode
;
1773 tree op
= (*ops
)[i
]->op
;
1775 /* If we undistributed in this chain already this may be
1777 if (TREE_CODE (op
) != SSA_NAME
)
1780 oedef
= SSA_NAME_DEF_STMT (op
);
1781 oecode
= gimple_assign_rhs_code (oedef
);
1782 if (oecode
!= c
->oecode
)
1785 FOR_EACH_VEC_ELT (subops
[i
], j
, oe1
)
1787 if (oe1
->op
== c
->op
)
1789 bitmap_set_bit (candidates2
, i
);
1796 if (nr_candidates2
>= 2)
1798 operand_entry
*oe1
, *oe2
;
1800 int first
= bitmap_first_set_bit (candidates2
);
1802 /* Build the new addition chain. */
1803 oe1
= (*ops
)[first
];
1804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1806 fprintf (dump_file
, "Building (");
1807 print_generic_expr (dump_file
, oe1
->op
);
1809 zero_one_operation (&oe1
->op
, c
->oecode
, c
->op
);
1810 EXECUTE_IF_SET_IN_BITMAP (candidates2
, first
+1, i
, sbi0
)
1814 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1816 fprintf (dump_file
, " + ");
1817 print_generic_expr (dump_file
, oe2
->op
);
1819 zero_one_operation (&oe2
->op
, c
->oecode
, c
->op
);
1820 sum
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1821 oe1
->op
, oe2
->op
, opcode
);
1822 oe2
->op
= build_zero_cst (TREE_TYPE (oe2
->op
));
1824 oe1
->op
= gimple_get_lhs (sum
);
1827 /* Apply the multiplication/division. */
1828 prod
= build_and_add_sum (TREE_TYPE (oe1
->op
),
1829 oe1
->op
, c
->op
, c
->oecode
);
1830 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1832 fprintf (dump_file
, ") %s ", c
->oecode
== MULT_EXPR
? "*" : "/");
1833 print_generic_expr (dump_file
, c
->op
);
1834 fprintf (dump_file
, "\n");
1837 /* Record it in the addition chain and disable further
1838 undistribution with this op. */
1839 oe1
->op
= gimple_assign_lhs (prod
);
1840 oe1
->rank
= get_rank (oe1
->op
);
1841 subops
[first
].release ();
1849 for (i
= 0; i
< ops
->length (); ++i
)
1850 subops
[i
].release ();
1857 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1858 first: element index for each relevant BIT_FIELD_REF.
1859 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1860 typedef std::pair
<unsigned, unsigned> v_info_elem
;
1863 auto_vec
<v_info_elem
, 32> vec
;
1865 typedef v_info
*v_info_ptr
;
1867 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1869 sort_by_mach_mode (const void *p_i
, const void *p_j
)
1871 const tree tr1
= *((const tree
*) p_i
);
1872 const tree tr2
= *((const tree
*) p_j
);
1873 unsigned int mode1
= TYPE_MODE (TREE_TYPE (tr1
));
1874 unsigned int mode2
= TYPE_MODE (TREE_TYPE (tr2
));
1877 else if (mode1
< mode2
)
1879 if (SSA_NAME_VERSION (tr1
) < SSA_NAME_VERSION (tr2
))
1881 else if (SSA_NAME_VERSION (tr1
) > SSA_NAME_VERSION (tr2
))
1886 /* Cleanup hash map for VECTOR information. */
1888 cleanup_vinfo_map (hash_map
<tree
, v_info_ptr
> &info_map
)
1890 for (hash_map
<tree
, v_info_ptr
>::iterator it
= info_map
.begin ();
1891 it
!= info_map
.end (); ++it
)
1893 v_info_ptr info
= (*it
).second
;
1895 (*it
).second
= NULL
;
1899 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1900 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1902 Vs = (V1 + V2 + ... + Vn)
1903 Vs[0] + Vs[1] + ... + Vs[k]
1905 The basic steps are listed below:
1907 1) Check the addition chain *OPS by looking those summands coming from
1908 VECTOR bit_field_ref on VECTOR type. Put the information into
1909 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1911 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1912 continuous, they can cover the whole VECTOR perfectly without any holes.
1913 Obtain one VECTOR list which contain candidates to be transformed.
1915 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1916 candidates with same mode, build the addition statements for them and
1917 generate BIT_FIELD_REFs accordingly.
1920 The current implementation requires the whole VECTORs should be fully
1921 covered, but it can be extended to support partial, checking adjacent
1922 but not fill the whole, it may need some cost model to define the
1923 boundary to do or not.
1926 undistribute_bitref_for_vector (enum tree_code opcode
,
1927 vec
<operand_entry
*> *ops
, struct loop
*loop
)
1929 if (ops
->length () <= 1)
1932 if (opcode
!= PLUS_EXPR
1933 && opcode
!= MULT_EXPR
1934 && opcode
!= BIT_XOR_EXPR
1935 && opcode
!= BIT_IOR_EXPR
1936 && opcode
!= BIT_AND_EXPR
)
1939 hash_map
<tree
, v_info_ptr
> v_info_map
;
1943 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1944 information into map. */
1945 FOR_EACH_VEC_ELT (*ops
, i
, oe1
)
1947 enum tree_code dcode
;
1950 if (TREE_CODE (oe1
->op
) != SSA_NAME
)
1952 oe1def
= SSA_NAME_DEF_STMT (oe1
->op
);
1953 if (!is_gimple_assign (oe1def
))
1955 dcode
= gimple_assign_rhs_code (oe1def
);
1956 if (dcode
!= BIT_FIELD_REF
|| !is_reassociable_op (oe1def
, dcode
, loop
))
1959 tree rhs
= gimple_assign_rhs1 (oe1def
);
1960 tree vec
= TREE_OPERAND (rhs
, 0);
1961 tree vec_type
= TREE_TYPE (vec
);
1963 if (TREE_CODE (vec
) != SSA_NAME
|| !VECTOR_TYPE_P (vec_type
))
1966 /* Ignore it if target machine can't support this VECTOR type. */
1967 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
1970 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1971 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
1974 if (VECTOR_TYPE_P (TREE_TYPE (rhs
))
1975 || !is_a
<scalar_mode
> (TYPE_MODE (TREE_TYPE (rhs
))))
1978 /* The type of BIT_FIELD_REF might not be equal to the element type of
1979 the vector. We want to use a vector type with element type the
1980 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1981 if (!useless_type_conversion_p (TREE_TYPE (rhs
), TREE_TYPE (vec_type
)))
1983 machine_mode simd_mode
;
1984 unsigned HOST_WIDE_INT size
, nunits
;
1985 unsigned HOST_WIDE_INT elem_size
1986 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs
)));
1987 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type
)).is_constant (&size
))
1989 if (size
<= elem_size
|| (size
% elem_size
) != 0)
1991 nunits
= size
/ elem_size
;
1992 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs
)),
1993 nunits
).exists (&simd_mode
))
1995 vec_type
= build_vector_type_for_mode (TREE_TYPE (rhs
), simd_mode
);
1997 /* Ignore it if target machine can't support this VECTOR type. */
1998 if (!VECTOR_MODE_P (TYPE_MODE (vec_type
)))
2001 /* Check const vector type, constrain BIT_FIELD_REF offset and
2003 if (!TYPE_VECTOR_SUBPARTS (vec_type
).is_constant ())
2006 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type
)),
2007 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec
)))))
2011 tree elem_type
= TREE_TYPE (vec_type
);
2012 unsigned HOST_WIDE_INT elem_size
= tree_to_uhwi (TYPE_SIZE (elem_type
));
2013 if (maybe_ne (bit_field_size (rhs
), elem_size
))
2017 if (!constant_multiple_p (bit_field_offset (rhs
), elem_size
, &idx
))
2020 /* Ignore it if target machine can't support this type of VECTOR
2022 optab op_tab
= optab_for_tree_code (opcode
, vec_type
, optab_vector
);
2023 if (optab_handler (op_tab
, TYPE_MODE (vec_type
)) == CODE_FOR_nothing
)
2027 v_info_ptr
&info
= v_info_map
.get_or_insert (vec
, &existed
);
2031 info
->vec_type
= vec_type
;
2033 else if (!types_compatible_p (vec_type
, info
->vec_type
))
2035 info
->vec
.safe_push (std::make_pair (idx
, i
));
2038 /* At least two VECTOR to combine. */
2039 if (v_info_map
.elements () <= 1)
2041 cleanup_vinfo_map (v_info_map
);
2045 /* Verify all VECTOR candidates by checking two conditions:
2046 1) sorted offsets are adjacent, no holes.
2047 2) can fill the whole VECTOR perfectly.
2048 And add the valid candidates to a vector for further handling. */
2049 auto_vec
<tree
> valid_vecs (v_info_map
.elements ());
2050 for (hash_map
<tree
, v_info_ptr
>::iterator it
= v_info_map
.begin ();
2051 it
!= v_info_map
.end (); ++it
)
2053 tree cand_vec
= (*it
).first
;
2054 v_info_ptr cand_info
= (*it
).second
;
2055 unsigned int num_elems
2056 = TYPE_VECTOR_SUBPARTS (cand_info
->vec_type
).to_constant ();
2057 if (cand_info
->vec
.length () != num_elems
)
2059 sbitmap holes
= sbitmap_alloc (num_elems
);
2060 bitmap_ones (holes
);
2063 FOR_EACH_VEC_ELT (cand_info
->vec
, i
, curr
)
2065 if (!bitmap_bit_p (holes
, curr
->first
))
2071 bitmap_clear_bit (holes
, curr
->first
);
2073 if (valid
&& bitmap_empty_p (holes
))
2074 valid_vecs
.quick_push (cand_vec
);
2075 sbitmap_free (holes
);
2078 /* At least two VECTOR to combine. */
2079 if (valid_vecs
.length () <= 1)
2081 cleanup_vinfo_map (v_info_map
);
2085 valid_vecs
.qsort (sort_by_mach_mode
);
2086 /* Go through all candidates by machine mode order, query the mode_to_total
2087 to get the total number for each mode and skip the single one. */
2088 for (unsigned i
= 0; i
< valid_vecs
.length () - 1; ++i
)
2090 tree tvec
= valid_vecs
[i
];
2091 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (tvec
));
2093 /* Skip modes with only a single candidate. */
2094 if (TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) != mode
)
2097 unsigned int idx
, j
;
2099 tree sum_vec
= tvec
;
2100 v_info_ptr info_ptr
= *(v_info_map
.get (tvec
));
2102 tree vec_type
= info_ptr
->vec_type
;
2104 /* Build the sum for all candidates with same mode. */
2107 sum
= build_and_add_sum (vec_type
, sum_vec
,
2108 valid_vecs
[i
+ 1], opcode
);
2109 /* Update the operands only after build_and_add_sum,
2110 so that we don't have to repeat the placement algorithm
2111 of build_and_add_sum. */
2113 && !useless_type_conversion_p (vec_type
, TREE_TYPE (sum_vec
)))
2115 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2116 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
, sum_vec
);
2117 tree lhs
= make_ssa_name (vec_type
);
2118 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2119 gimple_set_uid (g
, gimple_uid (sum
));
2120 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2121 gimple_assign_set_rhs1 (sum
, lhs
);
2124 if (!useless_type_conversion_p (vec_type
,
2125 TREE_TYPE (valid_vecs
[i
+ 1])))
2127 gimple_stmt_iterator gsi
= gsi_for_stmt (sum
);
2128 tree vce
= build1 (VIEW_CONVERT_EXPR
, vec_type
,
2130 tree lhs
= make_ssa_name (vec_type
);
2131 gimple
*g
= gimple_build_assign (lhs
, VIEW_CONVERT_EXPR
, vce
);
2132 gimple_set_uid (g
, gimple_uid (sum
));
2133 gsi_insert_before (&gsi
, g
, GSI_NEW_STMT
);
2134 gimple_assign_set_rhs2 (sum
, lhs
);
2137 sum_vec
= gimple_get_lhs (sum
);
2138 info_ptr
= *(v_info_map
.get (valid_vecs
[i
+ 1]));
2139 gcc_assert (types_compatible_p (vec_type
, info_ptr
->vec_type
));
2140 /* Update those related ops of current candidate VECTOR. */
2141 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2144 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2145 /* Set this then op definition will get DCEd later. */
2146 gimple_set_visited (def
, true);
2147 if (opcode
== PLUS_EXPR
2148 || opcode
== BIT_XOR_EXPR
2149 || opcode
== BIT_IOR_EXPR
)
2150 (*ops
)[idx
]->op
= build_zero_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2151 else if (opcode
== MULT_EXPR
)
2152 (*ops
)[idx
]->op
= build_one_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2155 gcc_assert (opcode
== BIT_AND_EXPR
);
2157 = build_all_ones_cst (TREE_TYPE ((*ops
)[idx
]->op
));
2159 (*ops
)[idx
]->rank
= 0;
2161 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2163 fprintf (dump_file
, "Generating addition -> ");
2164 print_gimple_stmt (dump_file
, sum
, 0);
2168 while ((i
< valid_vecs
.length () - 1)
2169 && TYPE_MODE (TREE_TYPE (valid_vecs
[i
+ 1])) == mode
);
2171 /* Referring to first valid VECTOR with this mode, generate the
2172 BIT_FIELD_REF statements accordingly. */
2173 info_ptr
= *(v_info_map
.get (tvec
));
2175 tree elem_type
= TREE_TYPE (vec_type
);
2176 FOR_EACH_VEC_ELT (info_ptr
->vec
, j
, elem
)
2179 tree dst
= make_ssa_name (elem_type
);
2180 tree pos
= bitsize_int (elem
->first
2181 * tree_to_uhwi (TYPE_SIZE (elem_type
)));
2182 tree bfr
= build3 (BIT_FIELD_REF
, elem_type
, sum_vec
,
2183 TYPE_SIZE (elem_type
), pos
);
2184 gimple
*gs
= gimple_build_assign (dst
, BIT_FIELD_REF
, bfr
);
2185 insert_stmt_after (gs
, sum
);
2186 gimple
*def
= SSA_NAME_DEF_STMT ((*ops
)[idx
]->op
);
2187 /* Set this then op definition will get DCEd later. */
2188 gimple_set_visited (def
, true);
2189 (*ops
)[idx
]->op
= gimple_assign_lhs (gs
);
2190 (*ops
)[idx
]->rank
= get_rank ((*ops
)[idx
]->op
);
2191 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2193 fprintf (dump_file
, "Generating bit_field_ref -> ");
2194 print_gimple_stmt (dump_file
, gs
, 0);
2199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2200 fprintf (dump_file
, "undistributiong bit_field_ref for vector done.\n");
2202 cleanup_vinfo_map (v_info_map
);
2207 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2208 expression, examine the other OPS to see if any of them are comparisons
2209 of the same values, which we may be able to combine or eliminate.
2210 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2213 eliminate_redundant_comparison (enum tree_code opcode
,
2214 vec
<operand_entry
*> *ops
,
2215 unsigned int currindex
,
2216 operand_entry
*curr
)
2219 enum tree_code lcode
, rcode
;
2220 gimple
*def1
, *def2
;
2224 if (opcode
!= BIT_IOR_EXPR
&& opcode
!= BIT_AND_EXPR
)
2227 /* Check that CURR is a comparison. */
2228 if (TREE_CODE (curr
->op
) != SSA_NAME
)
2230 def1
= SSA_NAME_DEF_STMT (curr
->op
);
2231 if (!is_gimple_assign (def1
))
2233 lcode
= gimple_assign_rhs_code (def1
);
2234 if (TREE_CODE_CLASS (lcode
) != tcc_comparison
)
2236 op1
= gimple_assign_rhs1 (def1
);
2237 op2
= gimple_assign_rhs2 (def1
);
2239 /* Now look for a similar comparison in the remaining OPS. */
2240 for (i
= currindex
+ 1; ops
->iterate (i
, &oe
); i
++)
2244 if (TREE_CODE (oe
->op
) != SSA_NAME
)
2246 def2
= SSA_NAME_DEF_STMT (oe
->op
);
2247 if (!is_gimple_assign (def2
))
2249 rcode
= gimple_assign_rhs_code (def2
);
2250 if (TREE_CODE_CLASS (rcode
) != tcc_comparison
)
2253 /* If we got here, we have a match. See if we can combine the
2255 tree type
= TREE_TYPE (gimple_assign_lhs (def1
));
2256 if (opcode
== BIT_IOR_EXPR
)
2257 t
= maybe_fold_or_comparisons (type
,
2259 rcode
, gimple_assign_rhs1 (def2
),
2260 gimple_assign_rhs2 (def2
));
2262 t
= maybe_fold_and_comparisons (type
,
2264 rcode
, gimple_assign_rhs1 (def2
),
2265 gimple_assign_rhs2 (def2
));
2269 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2270 always give us a boolean_type_node value back. If the original
2271 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2272 we need to convert. */
2273 if (!useless_type_conversion_p (TREE_TYPE (curr
->op
), TREE_TYPE (t
)))
2275 if (!fold_convertible_p (TREE_TYPE (curr
->op
), t
))
2277 t
= fold_convert (TREE_TYPE (curr
->op
), t
);
2280 if (TREE_CODE (t
) != INTEGER_CST
2281 && !operand_equal_p (t
, curr
->op
, 0))
2283 enum tree_code subcode
;
2284 tree newop1
, newop2
;
2285 if (!COMPARISON_CLASS_P (t
))
2287 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2288 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2289 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2290 if (!is_gimple_val (newop1
) || !is_gimple_val (newop2
))
2292 if (lcode
== TREE_CODE (t
)
2293 && operand_equal_p (op1
, newop1
, 0)
2294 && operand_equal_p (op2
, newop2
, 0))
2296 else if ((TREE_CODE (newop1
) == SSA_NAME
2297 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1
))
2298 || (TREE_CODE (newop2
) == SSA_NAME
2299 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2
)))
2303 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2305 fprintf (dump_file
, "Equivalence: ");
2306 print_generic_expr (dump_file
, curr
->op
);
2307 fprintf (dump_file
, " %s ", op_symbol_code (opcode
));
2308 print_generic_expr (dump_file
, oe
->op
);
2309 fprintf (dump_file
, " -> ");
2310 print_generic_expr (dump_file
, t
);
2311 fprintf (dump_file
, "\n");
2314 /* Now we can delete oe, as it has been subsumed by the new combined
2316 ops
->ordered_remove (i
);
2317 reassociate_stats
.ops_eliminated
++;
2319 /* If t is the same as curr->op, we're done. Otherwise we must
2320 replace curr->op with t. Special case is if we got a constant
2321 back, in which case we add it to the end instead of in place of
2322 the current entry. */
2323 if (TREE_CODE (t
) == INTEGER_CST
)
2325 ops
->ordered_remove (currindex
);
2326 add_to_ops_vec (ops
, t
);
2328 else if (!operand_equal_p (t
, curr
->op
, 0))
2331 enum tree_code subcode
;
2334 gcc_assert (COMPARISON_CLASS_P (t
));
2335 extract_ops_from_tree (t
, &subcode
, &newop1
, &newop2
);
2336 STRIP_USELESS_TYPE_CONVERSION (newop1
);
2337 STRIP_USELESS_TYPE_CONVERSION (newop2
);
2338 gcc_checking_assert (is_gimple_val (newop1
)
2339 && is_gimple_val (newop2
));
2340 sum
= build_and_add_sum (TREE_TYPE (t
), newop1
, newop2
, subcode
);
2341 curr
->op
= gimple_get_lhs (sum
);
2350 /* Transform repeated addition of same values into multiply with
2353 transform_add_to_multiply (vec
<operand_entry
*> *ops
)
2356 tree op
= NULL_TREE
;
2358 int i
, start
= -1, end
= 0, count
= 0;
2359 auto_vec
<std::pair
<int, int> > indxs
;
2360 bool changed
= false;
2362 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2363 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops
)[0]->op
))
2364 || !flag_unsafe_math_optimizations
))
2367 /* Look for repeated operands. */
2368 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
2376 else if (operand_equal_p (oe
->op
, op
, 0))
2384 indxs
.safe_push (std::make_pair (start
, end
));
2392 indxs
.safe_push (std::make_pair (start
, end
));
2394 for (j
= indxs
.length () - 1; j
>= 0; --j
)
2396 /* Convert repeated operand addition to multiplication. */
2397 start
= indxs
[j
].first
;
2398 end
= indxs
[j
].second
;
2399 op
= (*ops
)[start
]->op
;
2400 count
= end
- start
+ 1;
2401 for (i
= end
; i
>= start
; --i
)
2402 ops
->unordered_remove (i
);
2403 tree tmp
= make_ssa_name (TREE_TYPE (op
));
2404 tree cst
= build_int_cst (integer_type_node
, count
);
2406 = gimple_build_assign (tmp
, MULT_EXPR
,
2407 op
, fold_convert (TREE_TYPE (op
), cst
));
2408 gimple_set_visited (mul_stmt
, true);
2409 add_to_ops_vec (ops
, tmp
, mul_stmt
);
2417 /* Perform various identities and other optimizations on the list of
2418 operand entries, stored in OPS. The tree code for the binary
2419 operation between all the operands is OPCODE. */
2422 optimize_ops_list (enum tree_code opcode
,
2423 vec
<operand_entry
*> *ops
)
2425 unsigned int length
= ops
->length ();
2428 operand_entry
*oelast
= NULL
;
2429 bool iterate
= false;
2434 oelast
= ops
->last ();
2436 /* If the last two are constants, pop the constants off, merge them
2437 and try the next two. */
2438 if (oelast
->rank
== 0 && is_gimple_min_invariant (oelast
->op
))
2440 operand_entry
*oelm1
= (*ops
)[length
- 2];
2442 if (oelm1
->rank
== 0
2443 && is_gimple_min_invariant (oelm1
->op
)
2444 && useless_type_conversion_p (TREE_TYPE (oelm1
->op
),
2445 TREE_TYPE (oelast
->op
)))
2447 tree folded
= fold_binary (opcode
, TREE_TYPE (oelm1
->op
),
2448 oelm1
->op
, oelast
->op
);
2450 if (folded
&& is_gimple_min_invariant (folded
))
2452 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2453 fprintf (dump_file
, "Merging constants\n");
2458 add_to_ops_vec (ops
, folded
);
2459 reassociate_stats
.constants_eliminated
++;
2461 optimize_ops_list (opcode
, ops
);
2467 eliminate_using_constants (opcode
, ops
);
2470 for (i
= 0; ops
->iterate (i
, &oe
);)
2474 if (eliminate_not_pairs (opcode
, ops
, i
, oe
))
2476 if (eliminate_duplicate_pair (opcode
, ops
, &done
, i
, oe
, oelast
)
2477 || (!done
&& eliminate_plus_minus_pair (opcode
, ops
, i
, oe
))
2478 || (!done
&& eliminate_redundant_comparison (opcode
, ops
, i
, oe
)))
2491 optimize_ops_list (opcode
, ops
);
2494 /* The following functions are subroutines to optimize_range_tests and allow
2495 it to try to change a logical combination of comparisons into a range
2499 X == 2 || X == 5 || X == 3 || X == 4
2503 (unsigned) (X - 2) <= 3
2505 For more information see comments above fold_test_range in fold-const.cc,
2506 this implementation is for GIMPLE. */
2510 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2513 dump_range_entry (FILE *file
, struct range_entry
*r
, bool skip_exp
)
2516 print_generic_expr (file
, r
->exp
);
2517 fprintf (file
, " %c[", r
->in_p
? '+' : '-');
2518 print_generic_expr (file
, r
->low
);
2520 print_generic_expr (file
, r
->high
);
2524 /* Dump the range entry R to STDERR. */
2527 debug_range_entry (struct range_entry
*r
)
2529 dump_range_entry (stderr
, r
, false);
2530 fputc ('\n', stderr
);
2533 /* This is similar to make_range in fold-const.cc, but on top of
2534 GIMPLE instead of trees. If EXP is non-NULL, it should be
2535 an SSA_NAME and STMT argument is ignored, otherwise STMT
2536 argument should be a GIMPLE_COND. */
2539 init_range_entry (struct range_entry
*r
, tree exp
, gimple
*stmt
)
2543 bool is_bool
, strict_overflow_p
;
2547 r
->strict_overflow_p
= false;
2549 r
->high
= NULL_TREE
;
2550 if (exp
!= NULL_TREE
2551 && (TREE_CODE (exp
) != SSA_NAME
|| !INTEGRAL_TYPE_P (TREE_TYPE (exp
))))
2554 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2555 and see if we can refine the range. Some of the cases below may not
2556 happen, but it doesn't seem worth worrying about this. We "continue"
2557 the outer loop when we've changed something; otherwise we "break"
2558 the switch, which will "break" the while. */
2559 low
= exp
? build_int_cst (TREE_TYPE (exp
), 0) : boolean_false_node
;
2562 strict_overflow_p
= false;
2564 if (exp
== NULL_TREE
)
2566 else if (TYPE_PRECISION (TREE_TYPE (exp
)) == 1)
2568 if (TYPE_UNSIGNED (TREE_TYPE (exp
)))
2573 else if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
)
2578 enum tree_code code
;
2579 tree arg0
, arg1
, exp_type
;
2583 if (exp
!= NULL_TREE
)
2585 if (TREE_CODE (exp
) != SSA_NAME
2586 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
))
2589 stmt
= SSA_NAME_DEF_STMT (exp
);
2590 if (!is_gimple_assign (stmt
))
2593 code
= gimple_assign_rhs_code (stmt
);
2594 arg0
= gimple_assign_rhs1 (stmt
);
2595 arg1
= gimple_assign_rhs2 (stmt
);
2596 exp_type
= TREE_TYPE (exp
);
2600 code
= gimple_cond_code (stmt
);
2601 arg0
= gimple_cond_lhs (stmt
);
2602 arg1
= gimple_cond_rhs (stmt
);
2603 exp_type
= boolean_type_node
;
2606 if (TREE_CODE (arg0
) != SSA_NAME
2607 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0
)
2608 || ssa_name_maybe_undef_p (arg0
))
2610 loc
= gimple_location (stmt
);
2614 if (TREE_CODE (TREE_TYPE (exp
)) == BOOLEAN_TYPE
2615 /* Ensure the range is either +[-,0], +[0,0],
2616 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2617 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2618 or similar expression of unconditional true or
2619 false, it should not be negated. */
2620 && ((high
&& integer_zerop (high
))
2621 || (low
&& integer_onep (low
))))
2634 if ((TYPE_PRECISION (exp_type
) == 1
2635 || TREE_CODE (exp_type
) == BOOLEAN_TYPE
)
2636 && TYPE_PRECISION (TREE_TYPE (arg0
)) > 1)
2639 else if (TYPE_PRECISION (TREE_TYPE (arg0
)) == 1)
2641 if (TYPE_UNSIGNED (TREE_TYPE (arg0
)))
2646 else if (TREE_CODE (TREE_TYPE (arg0
)) == BOOLEAN_TYPE
)
2661 nexp
= make_range_step (loc
, code
, arg0
, arg1
, exp_type
,
2663 &strict_overflow_p
);
2664 if (nexp
!= NULL_TREE
)
2667 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
2680 r
->strict_overflow_p
= strict_overflow_p
;
2684 /* Comparison function for qsort. Sort entries
2685 without SSA_NAME exp first, then with SSA_NAMEs sorted
2686 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2687 by increasing ->low and if ->low is the same, by increasing
2688 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2692 range_entry_cmp (const void *a
, const void *b
)
2694 const struct range_entry
*p
= (const struct range_entry
*) a
;
2695 const struct range_entry
*q
= (const struct range_entry
*) b
;
2697 if (p
->exp
!= NULL_TREE
&& TREE_CODE (p
->exp
) == SSA_NAME
)
2699 if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2701 /* Group range_entries for the same SSA_NAME together. */
2702 if (SSA_NAME_VERSION (p
->exp
) < SSA_NAME_VERSION (q
->exp
))
2704 else if (SSA_NAME_VERSION (p
->exp
) > SSA_NAME_VERSION (q
->exp
))
2706 /* If ->low is different, NULL low goes first, then by
2708 if (p
->low
!= NULL_TREE
)
2710 if (q
->low
!= NULL_TREE
)
2712 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2714 if (tem
&& integer_onep (tem
))
2716 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2718 if (tem
&& integer_onep (tem
))
2724 else if (q
->low
!= NULL_TREE
)
2726 /* If ->high is different, NULL high goes last, before that by
2728 if (p
->high
!= NULL_TREE
)
2730 if (q
->high
!= NULL_TREE
)
2732 tree tem
= fold_binary (LT_EXPR
, boolean_type_node
,
2734 if (tem
&& integer_onep (tem
))
2736 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
2738 if (tem
&& integer_onep (tem
))
2744 else if (q
->high
!= NULL_TREE
)
2746 /* If both ranges are the same, sort below by ascending idx. */
2751 else if (q
->exp
!= NULL_TREE
&& TREE_CODE (q
->exp
) == SSA_NAME
)
2754 if (p
->idx
< q
->idx
)
2758 gcc_checking_assert (p
->idx
> q
->idx
);
2763 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2764 insert needed statements BEFORE or after GSI. */
2767 force_into_ssa_name (gimple_stmt_iterator
*gsi
, tree expr
, bool before
)
2769 enum gsi_iterator_update m
= before
? GSI_SAME_STMT
: GSI_CONTINUE_LINKING
;
2770 tree ret
= force_gimple_operand_gsi (gsi
, expr
, true, NULL_TREE
, before
, m
);
2771 if (TREE_CODE (ret
) != SSA_NAME
)
2773 gimple
*g
= gimple_build_assign (make_ssa_name (TREE_TYPE (ret
)), ret
);
2775 gsi_insert_before (gsi
, g
, GSI_SAME_STMT
);
2777 gsi_insert_after (gsi
, g
, GSI_CONTINUE_LINKING
);
2778 ret
= gimple_assign_lhs (g
);
2783 /* Helper routine of optimize_range_test.
2784 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2785 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2786 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2787 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2788 an array of COUNT pointers to other ranges. Return
2789 true if the range merge has been successful.
2790 If OPCODE is ERROR_MARK, this is called from within
2791 maybe_optimize_range_tests and is performing inter-bb range optimization.
2792 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2796 update_range_test (struct range_entry
*range
, struct range_entry
*otherrange
,
2797 struct range_entry
**otherrangep
,
2798 unsigned int count
, enum tree_code opcode
,
2799 vec
<operand_entry
*> *ops
, tree exp
, gimple_seq seq
,
2800 bool in_p
, tree low
, tree high
, bool strict_overflow_p
)
2802 unsigned int idx
= range
->idx
;
2803 struct range_entry
*swap_with
= NULL
;
2804 basic_block rewrite_bb_first
= NULL
, rewrite_bb_last
= NULL
;
2805 if (opcode
== ERROR_MARK
)
2807 /* For inter-bb range test optimization, pick from the range tests
2808 the one which is tested in the earliest condition (one dominating
2809 the others), because otherwise there could be some UB (e.g. signed
2810 overflow) in following bbs that we'd expose which wasn't there in
2811 the original program. See PR104196. */
2812 basic_block orig_range_bb
= BASIC_BLOCK_FOR_FN (cfun
, (*ops
)[idx
]->id
);
2813 basic_block range_bb
= orig_range_bb
;
2814 for (unsigned int i
= 0; i
< count
; i
++)
2816 struct range_entry
*this_range
;
2818 this_range
= otherrange
+ i
;
2820 this_range
= otherrangep
[i
];
2821 operand_entry
*oe
= (*ops
)[this_range
->idx
];
2822 basic_block this_bb
= BASIC_BLOCK_FOR_FN (cfun
, oe
->id
);
2823 if (range_bb
!= this_bb
2824 && dominated_by_p (CDI_DOMINATORS
, range_bb
, this_bb
))
2826 swap_with
= this_range
;
2828 idx
= this_range
->idx
;
2831 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2832 only defined in later blocks. In this case we can't move the
2833 merged comparison earlier, so instead check if there are any stmts
2834 that might trigger signed integer overflow in between and rewrite
2835 them. But only after we check if the optimization is possible. */
2836 if (seq
&& swap_with
)
2838 rewrite_bb_first
= range_bb
;
2839 rewrite_bb_last
= orig_range_bb
;
2844 operand_entry
*oe
= (*ops
)[idx
];
2846 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
2847 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
2848 location_t loc
= gimple_location (stmt
);
2849 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
2850 tree tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
2852 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
2853 gimple_stmt_iterator gsi
;
2854 unsigned int i
, uid
;
2856 if (tem
== NULL_TREE
)
2859 /* If op is default def SSA_NAME, there is no place to insert the
2860 new comparison. Give up, unless we can use OP itself as the
2862 if (op
&& SSA_NAME_IS_DEFAULT_DEF (op
))
2864 if (op
== range
->exp
2865 && ((TYPE_PRECISION (optype
) == 1 && TYPE_UNSIGNED (optype
))
2866 || TREE_CODE (optype
) == BOOLEAN_TYPE
)
2868 || (TREE_CODE (tem
) == EQ_EXPR
2869 && TREE_OPERAND (tem
, 0) == op
2870 && integer_onep (TREE_OPERAND (tem
, 1))))
2871 && opcode
!= BIT_IOR_EXPR
2872 && (opcode
!= ERROR_MARK
|| oe
->rank
!= BIT_IOR_EXPR
))
2882 std::swap (range
->idx
, swap_with
->idx
);
2884 if (strict_overflow_p
&& issue_strict_overflow_warning (wc
))
2885 warning_at (loc
, OPT_Wstrict_overflow
,
2886 "assuming signed overflow does not occur "
2887 "when simplifying range test");
2889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2891 struct range_entry
*r
;
2892 fprintf (dump_file
, "Optimizing range tests ");
2893 dump_range_entry (dump_file
, range
, false);
2894 for (i
= 0; i
< count
; i
++)
2901 && r
->exp
!= range
->exp
2902 && TREE_CODE (r
->exp
) == SSA_NAME
)
2904 fprintf (dump_file
, " and ");
2905 dump_range_entry (dump_file
, r
, false);
2909 fprintf (dump_file
, " and");
2910 dump_range_entry (dump_file
, r
, true);
2913 fprintf (dump_file
, "\n into ");
2914 print_generic_expr (dump_file
, tem
);
2915 fprintf (dump_file
, "\n");
2918 /* In inter-bb range optimization mode, if we have a seq, we can't
2919 move the merged comparison to the earliest bb from the comparisons
2920 being replaced, so instead rewrite stmts that could trigger signed
2921 integer overflow. */
2922 for (basic_block bb
= rewrite_bb_last
;
2923 bb
!= rewrite_bb_first
; bb
= single_pred (bb
))
2924 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
);
2925 !gsi_end_p (gsi
); gsi_next (&gsi
))
2927 gimple
*stmt
= gsi_stmt (gsi
);
2928 if (is_gimple_assign (stmt
))
2929 if (tree lhs
= gimple_assign_lhs (stmt
))
2930 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
2931 || POINTER_TYPE_P (TREE_TYPE (lhs
)))
2932 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
2934 enum tree_code code
= gimple_assign_rhs_code (stmt
);
2935 if (arith_code_with_undefined_signed_overflow (code
))
2937 gimple_stmt_iterator gsip
= gsi
;
2938 gimple_stmt_iterator gsin
= gsi
;
2941 rewrite_to_defined_overflow (&gsi
);
2942 unsigned uid
= gimple_uid (stmt
);
2943 if (gsi_end_p (gsip
))
2944 gsip
= gsi_after_labels (bb
);
2947 for (; gsi_stmt (gsip
) != gsi_stmt (gsin
);
2949 gimple_set_uid (gsi_stmt (gsip
), uid
);
2954 if (opcode
== BIT_IOR_EXPR
2955 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
2956 tem
= invert_truthvalue_loc (loc
, tem
);
2958 tem
= fold_convert_loc (loc
, optype
, tem
);
2961 gsi
= gsi_for_stmt (stmt
);
2962 uid
= gimple_uid (stmt
);
2970 gcc_checking_assert (tem
== op
);
2971 /* In rare cases range->exp can be equal to lhs of stmt.
2972 In that case we have to insert after the stmt rather then before
2973 it. If stmt is a PHI, insert it at the start of the basic block. */
2974 else if (op
!= range
->exp
)
2976 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
2977 tem
= force_into_ssa_name (&gsi
, tem
, true);
2980 else if (gimple_code (stmt
) != GIMPLE_PHI
)
2982 gsi_insert_seq_after (&gsi
, seq
, GSI_CONTINUE_LINKING
);
2983 tem
= force_into_ssa_name (&gsi
, tem
, false);
2987 gsi
= gsi_after_labels (gimple_bb (stmt
));
2988 if (!gsi_end_p (gsi
))
2989 uid
= gimple_uid (gsi_stmt (gsi
));
2992 gsi
= gsi_start_bb (gimple_bb (stmt
));
2994 while (!gsi_end_p (gsi
))
2996 uid
= gimple_uid (gsi_stmt (gsi
));
3000 gsi_insert_seq_before (&gsi
, seq
, GSI_SAME_STMT
);
3001 tem
= force_into_ssa_name (&gsi
, tem
, true);
3002 if (gsi_end_p (gsi
))
3003 gsi
= gsi_last_bb (gimple_bb (stmt
));
3007 for (; !gsi_end_p (gsi
); gsi_prev (&gsi
))
3008 if (gimple_uid (gsi_stmt (gsi
)))
3011 gimple_set_uid (gsi_stmt (gsi
), uid
);
3018 range
->strict_overflow_p
= false;
3020 for (i
= 0; i
< count
; i
++)
3023 range
= otherrange
+ i
;
3025 range
= otherrangep
[i
];
3026 oe
= (*ops
)[range
->idx
];
3027 /* Now change all the other range test immediate uses, so that
3028 those tests will be optimized away. */
3029 if (opcode
== ERROR_MARK
)
3032 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
3033 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
3035 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
3036 ? boolean_false_node
: boolean_true_node
);
3039 oe
->op
= error_mark_node
;
3040 range
->exp
= NULL_TREE
;
3041 range
->low
= NULL_TREE
;
3042 range
->high
= NULL_TREE
;
3047 /* Optimize X == CST1 || X == CST2
3048 if popcount (CST1 ^ CST2) == 1 into
3049 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3050 Similarly for ranges. E.g.
3051 X != 2 && X != 3 && X != 10 && X != 11
3052 will be transformed by the previous optimization into
3053 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3054 and this loop can transform that into
3055 !(((X & ~8) - 2U) <= 1U). */
3058 optimize_range_tests_xor (enum tree_code opcode
, tree type
,
3059 tree lowi
, tree lowj
, tree highi
, tree highj
,
3060 vec
<operand_entry
*> *ops
,
3061 struct range_entry
*rangei
,
3062 struct range_entry
*rangej
)
3064 tree lowxor
, highxor
, tem
, exp
;
3065 /* Check lowi ^ lowj == highi ^ highj and
3066 popcount (lowi ^ lowj) == 1. */
3067 lowxor
= fold_binary (BIT_XOR_EXPR
, type
, lowi
, lowj
);
3068 if (lowxor
== NULL_TREE
|| TREE_CODE (lowxor
) != INTEGER_CST
)
3070 if (!integer_pow2p (lowxor
))
3072 highxor
= fold_binary (BIT_XOR_EXPR
, type
, highi
, highj
);
3073 if (!tree_int_cst_equal (lowxor
, highxor
))
3077 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3078 int prec
= GET_MODE_PRECISION (mode
);
3079 if (TYPE_PRECISION (type
) < prec
3080 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3081 != wi::min_value (prec
, TYPE_SIGN (type
)))
3082 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3083 != wi::max_value (prec
, TYPE_SIGN (type
))))
3085 type
= build_nonstandard_integer_type (prec
, TYPE_UNSIGNED (type
));
3086 exp
= fold_convert (type
, exp
);
3087 lowxor
= fold_convert (type
, lowxor
);
3088 lowi
= fold_convert (type
, lowi
);
3089 highi
= fold_convert (type
, highi
);
3091 tem
= fold_build1 (BIT_NOT_EXPR
, type
, lowxor
);
3092 exp
= fold_build2 (BIT_AND_EXPR
, type
, exp
, tem
);
3093 lowj
= fold_build2 (BIT_AND_EXPR
, type
, lowi
, tem
);
3094 highj
= fold_build2 (BIT_AND_EXPR
, type
, highi
, tem
);
3095 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, exp
,
3096 NULL
, rangei
->in_p
, lowj
, highj
,
3097 rangei
->strict_overflow_p
3098 || rangej
->strict_overflow_p
))
3103 /* Optimize X == CST1 || X == CST2
3104 if popcount (CST2 - CST1) == 1 into
3105 ((X - CST1) & ~(CST2 - CST1)) == 0.
3106 Similarly for ranges. E.g.
3107 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3108 || X == 75 || X == 45
3109 will be transformed by the previous optimization into
3110 (X - 43U) <= 3U || (X - 75U) <= 3U
3111 and this loop can transform that into
3112 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3114 optimize_range_tests_diff (enum tree_code opcode
, tree type
,
3115 tree lowi
, tree lowj
, tree highi
, tree highj
,
3116 vec
<operand_entry
*> *ops
,
3117 struct range_entry
*rangei
,
3118 struct range_entry
*rangej
)
3120 tree tem1
, tem2
, mask
;
3121 /* Check highi - lowi == highj - lowj. */
3122 tem1
= fold_binary (MINUS_EXPR
, type
, highi
, lowi
);
3123 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3125 tem2
= fold_binary (MINUS_EXPR
, type
, highj
, lowj
);
3126 if (!tree_int_cst_equal (tem1
, tem2
))
3128 /* Check popcount (lowj - lowi) == 1. */
3129 tem1
= fold_binary (MINUS_EXPR
, type
, lowj
, lowi
);
3130 if (tem1
== NULL_TREE
|| TREE_CODE (tem1
) != INTEGER_CST
)
3132 if (!integer_pow2p (tem1
))
3135 scalar_int_mode mode
= as_a
<scalar_int_mode
> (TYPE_MODE (type
));
3136 int prec
= GET_MODE_PRECISION (mode
);
3137 if (TYPE_PRECISION (type
) < prec
3138 || (wi::to_wide (TYPE_MIN_VALUE (type
))
3139 != wi::min_value (prec
, TYPE_SIGN (type
)))
3140 || (wi::to_wide (TYPE_MAX_VALUE (type
))
3141 != wi::max_value (prec
, TYPE_SIGN (type
))))
3142 type
= build_nonstandard_integer_type (prec
, 1);
3144 type
= unsigned_type_for (type
);
3145 tem1
= fold_convert (type
, tem1
);
3146 tem2
= fold_convert (type
, tem2
);
3147 lowi
= fold_convert (type
, lowi
);
3148 mask
= fold_build1 (BIT_NOT_EXPR
, type
, tem1
);
3149 tem1
= fold_build2 (MINUS_EXPR
, type
,
3150 fold_convert (type
, rangei
->exp
), lowi
);
3151 tem1
= fold_build2 (BIT_AND_EXPR
, type
, tem1
, mask
);
3152 lowj
= build_int_cst (type
, 0);
3153 if (update_range_test (rangei
, rangej
, NULL
, 1, opcode
, ops
, tem1
,
3154 NULL
, rangei
->in_p
, lowj
, tem2
,
3155 rangei
->strict_overflow_p
3156 || rangej
->strict_overflow_p
))
3161 /* It does some common checks for function optimize_range_tests_xor and
3162 optimize_range_tests_diff.
3163 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3164 Else it calls optimize_range_tests_diff. */
3167 optimize_range_tests_1 (enum tree_code opcode
, int first
, int length
,
3168 bool optimize_xor
, vec
<operand_entry
*> *ops
,
3169 struct range_entry
*ranges
)
3172 bool any_changes
= false;
3173 for (i
= first
; i
< length
; i
++)
3175 tree lowi
, highi
, lowj
, highj
, type
, tem
;
3177 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3179 type
= TREE_TYPE (ranges
[i
].exp
);
3180 if (!INTEGRAL_TYPE_P (type
))
3182 lowi
= ranges
[i
].low
;
3183 if (lowi
== NULL_TREE
)
3184 lowi
= TYPE_MIN_VALUE (type
);
3185 highi
= ranges
[i
].high
;
3186 if (highi
== NULL_TREE
)
3188 for (j
= i
+ 1; j
< length
&& j
< i
+ 64; j
++)
3191 if (ranges
[i
].exp
!= ranges
[j
].exp
|| ranges
[j
].in_p
)
3193 lowj
= ranges
[j
].low
;
3194 if (lowj
== NULL_TREE
)
3196 highj
= ranges
[j
].high
;
3197 if (highj
== NULL_TREE
)
3198 highj
= TYPE_MAX_VALUE (type
);
3199 /* Check lowj > highi. */
3200 tem
= fold_binary (GT_EXPR
, boolean_type_node
,
3202 if (tem
== NULL_TREE
|| !integer_onep (tem
))
3205 changes
= optimize_range_tests_xor (opcode
, type
, lowi
, lowj
,
3207 ranges
+ i
, ranges
+ j
);
3209 changes
= optimize_range_tests_diff (opcode
, type
, lowi
, lowj
,
3211 ranges
+ i
, ranges
+ j
);
3222 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3223 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3224 EXP on success, NULL otherwise. */
3227 extract_bit_test_mask (tree exp
, int prec
, tree totallow
, tree low
, tree high
,
3228 wide_int
*mask
, tree
*totallowp
)
3230 tree tem
= int_const_binop (MINUS_EXPR
, high
, low
);
3231 if (tem
== NULL_TREE
3232 || TREE_CODE (tem
) != INTEGER_CST
3233 || TREE_OVERFLOW (tem
)
3234 || tree_int_cst_sgn (tem
) == -1
3235 || compare_tree_int (tem
, prec
) != -1)
3238 unsigned HOST_WIDE_INT max
= tree_to_uhwi (tem
) + 1;
3239 *mask
= wi::shifted_mask (0, max
, false, prec
);
3240 if (TREE_CODE (exp
) == BIT_AND_EXPR
3241 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3243 widest_int msk
= wi::to_widest (TREE_OPERAND (exp
, 1));
3244 msk
= wi::zext (~msk
, TYPE_PRECISION (TREE_TYPE (exp
)));
3245 if (wi::popcount (msk
) == 1
3246 && wi::ltu_p (msk
, prec
- max
))
3248 *mask
|= wi::shifted_mask (msk
.to_uhwi (), max
, false, prec
);
3249 max
+= msk
.to_uhwi ();
3250 exp
= TREE_OPERAND (exp
, 0);
3251 if (integer_zerop (low
)
3252 && TREE_CODE (exp
) == PLUS_EXPR
3253 && TREE_CODE (TREE_OPERAND (exp
, 1)) == INTEGER_CST
)
3255 tree ret
= TREE_OPERAND (exp
, 0);
3258 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp
, 1)),
3259 TYPE_PRECISION (TREE_TYPE (low
))));
3260 tree tbias
= wide_int_to_tree (TREE_TYPE (ret
), bias
);
3266 else if (!tree_int_cst_lt (totallow
, tbias
))
3268 bias
= wi::to_widest (tbias
);
3269 bias
-= wi::to_widest (totallow
);
3270 if (bias
>= 0 && bias
< prec
- max
)
3272 *mask
= wi::lshift (*mask
, bias
);
3280 if (!tree_int_cst_lt (totallow
, low
))
3282 tem
= int_const_binop (MINUS_EXPR
, low
, totallow
);
3283 if (tem
== NULL_TREE
3284 || TREE_CODE (tem
) != INTEGER_CST
3285 || TREE_OVERFLOW (tem
)
3286 || compare_tree_int (tem
, prec
- max
) == 1)
3289 *mask
= wi::lshift (*mask
, wi::to_widest (tem
));
3293 /* Attempt to optimize small range tests using bit test.
3295 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3296 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3297 has been by earlier optimizations optimized into:
3298 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3299 As all the 43 through 82 range is less than 64 numbers,
3300 for 64-bit word targets optimize that into:
3301 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3304 optimize_range_tests_to_bit_test (enum tree_code opcode
, int first
, int length
,
3305 vec
<operand_entry
*> *ops
,
3306 struct range_entry
*ranges
)
3309 bool any_changes
= false;
3310 int prec
= GET_MODE_BITSIZE (word_mode
);
3311 auto_vec
<struct range_entry
*, 64> candidates
;
3313 for (i
= first
; i
< length
- 1; i
++)
3315 tree lowi
, highi
, lowj
, highj
, type
;
3317 if (ranges
[i
].exp
== NULL_TREE
|| ranges
[i
].in_p
)
3319 type
= TREE_TYPE (ranges
[i
].exp
);
3320 if (!INTEGRAL_TYPE_P (type
))
3322 lowi
= ranges
[i
].low
;
3323 if (lowi
== NULL_TREE
)
3324 lowi
= TYPE_MIN_VALUE (type
);
3325 highi
= ranges
[i
].high
;
3326 if (highi
== NULL_TREE
)
3329 tree exp
= extract_bit_test_mask (ranges
[i
].exp
, prec
, lowi
, lowi
,
3330 highi
, &mask
, &lowi
);
3331 if (exp
== NULL_TREE
)
3333 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
3334 candidates
.truncate (0);
3335 int end
= MIN (i
+ 64, length
);
3336 for (j
= i
+ 1; j
< end
; j
++)
3339 if (ranges
[j
].exp
== NULL_TREE
|| ranges
[j
].in_p
)
3341 if (ranges
[j
].exp
== exp
)
3343 else if (TREE_CODE (ranges
[j
].exp
) == BIT_AND_EXPR
)
3345 exp2
= TREE_OPERAND (ranges
[j
].exp
, 0);
3348 else if (TREE_CODE (exp2
) == PLUS_EXPR
)
3350 exp2
= TREE_OPERAND (exp2
, 0);
3360 lowj
= ranges
[j
].low
;
3361 if (lowj
== NULL_TREE
)
3363 highj
= ranges
[j
].high
;
3364 if (highj
== NULL_TREE
)
3365 highj
= TYPE_MAX_VALUE (type
);
3367 exp2
= extract_bit_test_mask (ranges
[j
].exp
, prec
, lowi
, lowj
,
3368 highj
, &mask2
, NULL
);
3372 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
3373 candidates
.safe_push (&ranges
[j
]);
3376 /* If every possible relative value of the expression is a valid shift
3377 amount, then we can merge the entry test in the bit test. In this
3378 case, if we would need otherwise 2 or more comparisons, then use
3379 the bit test; in the other cases, the threshold is 3 comparisons. */
3380 bool entry_test_needed
;
3382 if (TREE_CODE (exp
) == SSA_NAME
3383 && get_range_query (cfun
)->range_of_expr (r
, exp
)
3384 && !r
.undefined_p ()
3386 && wi::leu_p (r
.upper_bound () - r
.lower_bound (), prec
- 1))
3388 wide_int min
= r
.lower_bound ();
3389 wide_int ilowi
= wi::to_wide (lowi
);
3390 if (wi::lt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3392 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3393 mask
= wi::lshift (mask
, ilowi
- min
);
3395 else if (wi::gt_p (min
, ilowi
, TYPE_SIGN (TREE_TYPE (lowi
))))
3397 lowi
= wide_int_to_tree (TREE_TYPE (lowi
), min
);
3398 mask
= wi::lrshift (mask
, min
- ilowi
);
3400 entry_test_needed
= false;
3403 entry_test_needed
= true;
3404 if (candidates
.length () >= (entry_test_needed
? 2 : 1))
3406 tree high
= wide_int_to_tree (TREE_TYPE (lowi
),
3407 wi::to_widest (lowi
)
3408 + prec
- 1 - wi::clz (mask
));
3409 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3411 gimple
*stmt
= op
? SSA_NAME_DEF_STMT (op
)
3412 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3414 location_t loc
= gimple_location (stmt
);
3415 tree optype
= op
? TREE_TYPE (op
) : boolean_type_node
;
3417 /* See if it isn't cheaper to pretend the minimum value of the
3418 range is 0, if maximum value is small enough.
3419 We can avoid then subtraction of the minimum value, but the
3420 mask constant could be perhaps more expensive. */
3421 if (compare_tree_int (lowi
, 0) > 0
3422 && compare_tree_int (high
, prec
) < 0
3423 && (entry_test_needed
|| wi::ltu_p (r
.upper_bound (), prec
)))
3426 HOST_WIDE_INT m
= tree_to_uhwi (lowi
);
3427 rtx reg
= gen_raw_REG (word_mode
, 10000);
3428 bool speed_p
= optimize_bb_for_speed_p (gimple_bb (stmt
));
3429 cost_diff
= set_src_cost (gen_rtx_PLUS (word_mode
, reg
,
3431 word_mode
, speed_p
);
3432 rtx r
= immed_wide_int_const (mask
, word_mode
);
3433 cost_diff
+= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3434 word_mode
, speed_p
);
3435 r
= immed_wide_int_const (wi::lshift (mask
, m
), word_mode
);
3436 cost_diff
-= set_src_cost (gen_rtx_AND (word_mode
, reg
, r
),
3437 word_mode
, speed_p
);
3440 mask
= wi::lshift (mask
, m
);
3441 lowi
= build_zero_cst (TREE_TYPE (lowi
));
3446 if (entry_test_needed
)
3448 tem
= build_range_check (loc
, optype
, unshare_expr (exp
),
3450 if (tem
== NULL_TREE
|| is_gimple_val (tem
))
3455 tree etype
= unsigned_type_for (TREE_TYPE (exp
));
3456 exp
= fold_build2_loc (loc
, MINUS_EXPR
, etype
,
3457 fold_convert_loc (loc
, etype
, exp
),
3458 fold_convert_loc (loc
, etype
, lowi
));
3459 exp
= fold_convert_loc (loc
, integer_type_node
, exp
);
3460 tree word_type
= lang_hooks
.types
.type_for_mode (word_mode
, 1);
3461 exp
= fold_build2_loc (loc
, LSHIFT_EXPR
, word_type
,
3462 build_int_cst (word_type
, 1), exp
);
3463 exp
= fold_build2_loc (loc
, BIT_AND_EXPR
, word_type
, exp
,
3464 wide_int_to_tree (word_type
, mask
));
3465 exp
= fold_build2_loc (loc
, EQ_EXPR
, optype
, exp
,
3466 build_zero_cst (word_type
));
3467 if (is_gimple_val (exp
))
3470 /* The shift might have undefined behavior if TEM is true,
3471 but reassociate_bb isn't prepared to have basic blocks
3472 split when it is running. So, temporarily emit a code
3473 with BIT_IOR_EXPR instead of &&, and fix it up in
3475 gimple_seq seq
= NULL
;
3478 tem
= force_gimple_operand (tem
, &seq
, true, NULL_TREE
);
3479 gcc_assert (TREE_CODE (tem
) == SSA_NAME
);
3480 gimple_set_visited (SSA_NAME_DEF_STMT (tem
), true);
3483 exp
= force_gimple_operand (exp
, &seq2
, true, NULL_TREE
);
3484 gimple_seq_add_seq_without_update (&seq
, seq2
);
3485 gcc_assert (TREE_CODE (exp
) == SSA_NAME
);
3486 gimple_set_visited (SSA_NAME_DEF_STMT (exp
), true);
3489 gimple
*g
= gimple_build_assign (make_ssa_name (optype
),
3490 BIT_IOR_EXPR
, tem
, exp
);
3491 gimple_set_location (g
, loc
);
3492 gimple_seq_add_stmt_without_update (&seq
, g
);
3493 exp
= gimple_assign_lhs (g
);
3495 tree val
= build_zero_cst (optype
);
3496 if (update_range_test (&ranges
[i
], NULL
, candidates
.address (),
3497 candidates
.length (), opcode
, ops
, exp
,
3498 seq
, false, val
, val
, strict_overflow_p
))
3502 reassoc_branch_fixups
.safe_push (tem
);
3505 gimple_seq_discard (seq
);
3511 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3512 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3513 Also, handle x < C && y < C && z < C where C is power of two as
3514 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3515 as (x | y | z) < 0. */
3518 optimize_range_tests_cmp_bitwise (enum tree_code opcode
, int first
, int length
,
3519 vec
<operand_entry
*> *ops
,
3520 struct range_entry
*ranges
)
3524 bool any_changes
= false;
3525 auto_vec
<int, 128> buckets
;
3526 auto_vec
<int, 32> chains
;
3527 auto_vec
<struct range_entry
*, 32> candidates
;
3529 for (i
= first
; i
< length
; i
++)
3533 if (ranges
[i
].exp
== NULL_TREE
3534 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3535 || TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) <= 1
3536 || TREE_CODE (TREE_TYPE (ranges
[i
].exp
)) == BOOLEAN_TYPE
)
3539 if (ranges
[i
].low
!= NULL_TREE
3540 && ranges
[i
].high
!= NULL_TREE
3542 && tree_int_cst_equal (ranges
[i
].low
, ranges
[i
].high
))
3544 idx
= !integer_zerop (ranges
[i
].low
);
3545 if (idx
&& !integer_all_onesp (ranges
[i
].low
))
3548 else if (ranges
[i
].high
!= NULL_TREE
3549 && TREE_CODE (ranges
[i
].high
) == INTEGER_CST
3552 wide_int w
= wi::to_wide (ranges
[i
].high
);
3553 int prec
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
));
3554 int l
= wi::clz (w
);
3558 || w
!= wi::mask (prec
- l
, false, prec
))
3560 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3561 && ranges
[i
].low
== NULL_TREE
)
3563 && integer_zerop (ranges
[i
].low
))))
3566 else if (ranges
[i
].high
== NULL_TREE
3567 && ranges
[i
].low
!= NULL_TREE
3568 /* Perform this optimization only in the last
3569 reassoc pass, as it interferes with the reassociation
3570 itself or could also with VRP etc. which might not
3571 be able to virtually undo the optimization. */
3572 && !reassoc_insert_powi_p
3573 && !TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3574 && integer_zerop (ranges
[i
].low
))
3579 b
= TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) * 4 + idx
;
3580 if (buckets
.length () <= b
)
3581 buckets
.safe_grow_cleared (b
+ 1, true);
3582 if (chains
.length () <= (unsigned) i
)
3583 chains
.safe_grow (i
+ 1, true);
3584 chains
[i
] = buckets
[b
];
3588 FOR_EACH_VEC_ELT (buckets
, b
, i
)
3589 if (i
&& chains
[i
- 1])
3594 /* When ranges[X - 1].high + 1 is a power of two,
3595 we need to process the same bucket up to
3596 precision - 1 times, each time split the entries
3597 with the same high bound into one chain and the
3598 rest into another one to be processed later. */
3601 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3603 if (tree_int_cst_equal (ranges
[i
- 1].high
,
3604 ranges
[j
- 1].high
))
3606 chains
[this_prev
- 1] = j
;
3609 else if (other_prev
== 0)
3616 chains
[other_prev
- 1] = j
;
3620 chains
[this_prev
- 1] = 0;
3622 chains
[other_prev
- 1] = 0;
3623 if (chains
[i
- 1] == 0)
3630 for (j
= chains
[i
- 1]; j
; j
= chains
[j
- 1])
3632 gimple
*gk
= SSA_NAME_DEF_STMT (ranges
[k
- 1].exp
);
3633 gimple
*gj
= SSA_NAME_DEF_STMT (ranges
[j
- 1].exp
);
3634 if (reassoc_stmt_dominates_stmt_p (gk
, gj
))
3637 tree type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3638 tree type2
= NULL_TREE
;
3639 bool strict_overflow_p
= false;
3640 candidates
.truncate (0);
3641 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3642 type1
= pointer_sized_int_node
;
3643 for (j
= i
; j
; j
= chains
[j
- 1])
3645 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3646 strict_overflow_p
|= ranges
[j
- 1].strict_overflow_p
;
3647 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3648 type
= pointer_sized_int_node
;
3651 /* For the signed < 0 cases, the types should be
3652 really compatible (all signed with the same precision,
3653 instead put ranges that have different in_p from
3655 if (!useless_type_conversion_p (type1
, type
))
3657 if (ranges
[j
- 1].in_p
!= ranges
[k
- 1].in_p
)
3658 candidates
.safe_push (&ranges
[j
- 1]);
3663 || useless_type_conversion_p (type1
, type
))
3665 else if (type2
== NULL_TREE
3666 || useless_type_conversion_p (type2
, type
))
3668 if (type2
== NULL_TREE
)
3670 candidates
.safe_push (&ranges
[j
- 1]);
3673 unsigned l
= candidates
.length ();
3674 for (j
= i
; j
; j
= chains
[j
- 1])
3676 tree type
= TREE_TYPE (ranges
[j
- 1].exp
);
3679 if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == OFFSET_TYPE
)
3680 type
= pointer_sized_int_node
;
3683 if (!useless_type_conversion_p (type1
, type
))
3685 if (ranges
[j
- 1].in_p
== ranges
[k
- 1].in_p
)
3686 candidates
.safe_push (&ranges
[j
- 1]);
3689 if (useless_type_conversion_p (type1
, type
))
3691 else if (type2
== NULL_TREE
3692 || useless_type_conversion_p (type2
, type
))
3694 candidates
.safe_push (&ranges
[j
- 1]);
3696 gimple_seq seq
= NULL
;
3697 tree op
= NULL_TREE
;
3699 struct range_entry
*r
;
3700 candidates
.safe_push (&ranges
[k
- 1]);
3701 FOR_EACH_VEC_ELT (candidates
, id
, r
)
3704 enum tree_code code
;
3711 || POINTER_TYPE_P (TREE_TYPE (op
))
3712 || TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3714 code
= (b
% 4) == 3 ? BIT_NOT_EXPR
: NOP_EXPR
;
3715 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3716 if (code
== BIT_NOT_EXPR
3717 && TREE_CODE (TREE_TYPE (op
)) == OFFSET_TYPE
)
3719 g
= gimple_build_assign (make_ssa_name (type3
),
3721 gimple_seq_add_stmt_without_update (&seq
, g
);
3722 op
= gimple_assign_lhs (g
);
3724 g
= gimple_build_assign (make_ssa_name (type3
), code
, op
);
3725 gimple_seq_add_stmt_without_update (&seq
, g
);
3726 op
= gimple_assign_lhs (g
);
3728 tree type
= TREE_TYPE (r
->exp
);
3730 if (POINTER_TYPE_P (type
)
3731 || TREE_CODE (type
) == OFFSET_TYPE
3732 || (id
>= l
&& !useless_type_conversion_p (type1
, type
)))
3734 tree type3
= id
>= l
? type1
: pointer_sized_int_node
;
3735 g
= gimple_build_assign (make_ssa_name (type3
), NOP_EXPR
, exp
);
3736 gimple_seq_add_stmt_without_update (&seq
, g
);
3737 exp
= gimple_assign_lhs (g
);
3740 code
= r
->in_p
? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3742 code
= (b
% 4) == 1 ? BIT_AND_EXPR
: BIT_IOR_EXPR
;
3743 g
= gimple_build_assign (make_ssa_name (id
>= l
? type1
: type2
),
3745 gimple_seq_add_stmt_without_update (&seq
, g
);
3746 op
= gimple_assign_lhs (g
);
3748 type1
= TREE_TYPE (ranges
[k
- 1].exp
);
3749 if (POINTER_TYPE_P (type1
) || TREE_CODE (type1
) == OFFSET_TYPE
)
3752 = gimple_build_assign (make_ssa_name (type1
), NOP_EXPR
, op
);
3753 gimple_seq_add_stmt_without_update (&seq
, g
);
3754 op
= gimple_assign_lhs (g
);
3757 if (update_range_test (&ranges
[k
- 1], NULL
, candidates
.address (),
3758 candidates
.length (), opcode
, ops
, op
,
3759 seq
, ranges
[k
- 1].in_p
, ranges
[k
- 1].low
,
3760 ranges
[k
- 1].high
, strict_overflow_p
))
3763 gimple_seq_discard (seq
);
3764 if ((b
% 4) == 2 && buckets
[b
] != i
)
3765 /* There is more work to do for this bucket. */
3772 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3773 a >= 0 && a < b into (unsigned) a < (unsigned) b
3774 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3777 optimize_range_tests_var_bound (enum tree_code opcode
, int first
, int length
,
3778 vec
<operand_entry
*> *ops
,
3779 struct range_entry
*ranges
,
3780 basic_block first_bb
)
3783 bool any_changes
= false;
3784 hash_map
<tree
, int> *map
= NULL
;
3786 for (i
= first
; i
< length
; i
++)
3788 if (ranges
[i
].exp
== NULL_TREE
3789 || TREE_CODE (ranges
[i
].exp
) != SSA_NAME
3793 tree type
= TREE_TYPE (ranges
[i
].exp
);
3794 if (!INTEGRAL_TYPE_P (type
)
3795 || TYPE_UNSIGNED (type
)
3796 || ranges
[i
].low
== NULL_TREE
3797 || !integer_zerop (ranges
[i
].low
)
3798 || ranges
[i
].high
!= NULL_TREE
)
3800 /* EXP >= 0 here. */
3802 map
= new hash_map
<tree
, int>;
3803 map
->put (ranges
[i
].exp
, i
);
3809 for (i
= 0; i
< length
; i
++)
3811 bool in_p
= ranges
[i
].in_p
;
3812 if (ranges
[i
].low
== NULL_TREE
3813 || ranges
[i
].high
== NULL_TREE
)
3815 if (!integer_zerop (ranges
[i
].low
)
3816 || !integer_zerop (ranges
[i
].high
))
3819 && TYPE_PRECISION (TREE_TYPE (ranges
[i
].exp
)) == 1
3820 && TYPE_UNSIGNED (TREE_TYPE (ranges
[i
].exp
))
3821 && integer_onep (ranges
[i
].low
)
3822 && integer_onep (ranges
[i
].high
))
3833 if (TREE_CODE (ranges
[i
].exp
) != SSA_NAME
)
3835 stmt
= SSA_NAME_DEF_STMT (ranges
[i
].exp
);
3836 if (!is_gimple_assign (stmt
))
3838 ccode
= gimple_assign_rhs_code (stmt
);
3839 rhs1
= gimple_assign_rhs1 (stmt
);
3840 rhs2
= gimple_assign_rhs2 (stmt
);
3844 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
3845 stmt
= last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
));
3846 if (gimple_code (stmt
) != GIMPLE_COND
)
3848 ccode
= gimple_cond_code (stmt
);
3849 rhs1
= gimple_cond_lhs (stmt
);
3850 rhs2
= gimple_cond_rhs (stmt
);
3853 if (TREE_CODE (rhs1
) != SSA_NAME
3854 || rhs2
== NULL_TREE
3855 || TREE_CODE (rhs2
) != SSA_NAME
)
3869 ccode
= invert_tree_comparison (ccode
, false);
3874 std::swap (rhs1
, rhs2
);
3875 ccode
= swap_tree_comparison (ccode
);
3884 int *idx
= map
->get (rhs1
);
3888 /* maybe_optimize_range_tests allows statements without side-effects
3889 in the basic blocks as long as they are consumed in the same bb.
3890 Make sure rhs2's def stmt is not among them, otherwise we can't
3891 use safely get_nonzero_bits on it. E.g. in:
3892 # RANGE [-83, 1] NONZERO 173
3893 # k_32 = PHI <k_47(13), k_12(9)>
3896 goto <bb 5>; [26.46%]
3898 goto <bb 9>; [73.54%]
3900 <bb 5> [local count: 140323371]:
3901 # RANGE [0, 1] NONZERO 1
3903 # RANGE [0, 4] NONZERO 4
3905 # RANGE [0, 4] NONZERO 4
3906 iftmp.0_44 = (char) _21;
3907 if (k_32 < iftmp.0_44)
3908 goto <bb 6>; [84.48%]
3910 goto <bb 9>; [15.52%]
3911 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3912 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3913 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3914 those stmts even for negative k_32 and the value ranges would be no
3915 longer guaranteed and so the optimization would be invalid. */
3916 while (opcode
== ERROR_MARK
)
3918 gimple
*g
= SSA_NAME_DEF_STMT (rhs2
);
3919 basic_block bb2
= gimple_bb (g
);
3922 && dominated_by_p (CDI_DOMINATORS
, bb2
, first_bb
))
3924 /* As an exception, handle a few common cases. */
3925 if (gimple_assign_cast_p (g
)
3926 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g
))))
3928 tree op0
= gimple_assign_rhs1 (g
);
3929 if (TYPE_UNSIGNED (TREE_TYPE (op0
))
3930 && (TYPE_PRECISION (TREE_TYPE (rhs2
))
3931 > TYPE_PRECISION (TREE_TYPE (op0
))))
3932 /* Zero-extension is always ok. */
3934 else if (TYPE_PRECISION (TREE_TYPE (rhs2
))
3935 == TYPE_PRECISION (TREE_TYPE (op0
))
3936 && TREE_CODE (op0
) == SSA_NAME
)
3938 /* Cast from signed to unsigned or vice versa. Retry
3939 with the op0 as new rhs2. */
3944 else if (is_gimple_assign (g
)
3945 && gimple_assign_rhs_code (g
) == BIT_AND_EXPR
3946 && TREE_CODE (gimple_assign_rhs2 (g
)) == INTEGER_CST
3947 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g
))))
3948 /* Masking with INTEGER_CST with MSB clear is always ok
3955 if (rhs2
== NULL_TREE
)
3958 wide_int nz
= get_nonzero_bits (rhs2
);
3962 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3963 and RHS2 is known to be RHS2 >= 0. */
3964 tree utype
= unsigned_type_for (TREE_TYPE (rhs1
));
3966 enum warn_strict_overflow_code wc
= WARN_STRICT_OVERFLOW_COMPARISON
;
3967 if ((ranges
[*idx
].strict_overflow_p
3968 || ranges
[i
].strict_overflow_p
)
3969 && issue_strict_overflow_warning (wc
))
3970 warning_at (gimple_location (stmt
), OPT_Wstrict_overflow
,
3971 "assuming signed overflow does not occur "
3972 "when simplifying range test");
3974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3976 struct range_entry
*r
= &ranges
[*idx
];
3977 fprintf (dump_file
, "Optimizing range test ");
3978 print_generic_expr (dump_file
, r
->exp
);
3979 fprintf (dump_file
, " +[");
3980 print_generic_expr (dump_file
, r
->low
);
3981 fprintf (dump_file
, ", ");
3982 print_generic_expr (dump_file
, r
->high
);
3983 fprintf (dump_file
, "] and comparison ");
3984 print_generic_expr (dump_file
, rhs1
);
3985 fprintf (dump_file
, " %s ", op_symbol_code (ccode
));
3986 print_generic_expr (dump_file
, rhs2
);
3987 fprintf (dump_file
, "\n into (");
3988 print_generic_expr (dump_file
, utype
);
3989 fprintf (dump_file
, ") ");
3990 print_generic_expr (dump_file
, rhs1
);
3991 fprintf (dump_file
, " %s (", op_symbol_code (ccode
));
3992 print_generic_expr (dump_file
, utype
);
3993 fprintf (dump_file
, ") ");
3994 print_generic_expr (dump_file
, rhs2
);
3995 fprintf (dump_file
, "\n");
3998 operand_entry
*oe
= (*ops
)[ranges
[i
].idx
];
4000 if (opcode
== BIT_IOR_EXPR
4001 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4004 ccode
= invert_tree_comparison (ccode
, false);
4007 unsigned int uid
= gimple_uid (stmt
);
4008 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4009 gimple
*g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs1
);
4010 gimple_set_uid (g
, uid
);
4011 rhs1
= gimple_assign_lhs (g
);
4012 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4013 if (!useless_type_conversion_p (utype
, TREE_TYPE (rhs2
)))
4015 g
= gimple_build_assign (make_ssa_name (utype
), NOP_EXPR
, rhs2
);
4016 gimple_set_uid (g
, uid
);
4017 rhs2
= gimple_assign_lhs (g
);
4018 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4020 if (tree_swap_operands_p (rhs1
, rhs2
))
4022 std::swap (rhs1
, rhs2
);
4023 ccode
= swap_tree_comparison (ccode
);
4025 if (gimple_code (stmt
) == GIMPLE_COND
)
4027 gcond
*c
= as_a
<gcond
*> (stmt
);
4028 gimple_cond_set_code (c
, ccode
);
4029 gimple_cond_set_lhs (c
, rhs1
);
4030 gimple_cond_set_rhs (c
, rhs2
);
4035 tree ctype
= oe
->op
? TREE_TYPE (oe
->op
) : boolean_type_node
;
4036 if (!INTEGRAL_TYPE_P (ctype
)
4037 || (TREE_CODE (ctype
) != BOOLEAN_TYPE
4038 && TYPE_PRECISION (ctype
) != 1))
4039 ctype
= boolean_type_node
;
4040 g
= gimple_build_assign (make_ssa_name (ctype
), ccode
, rhs1
, rhs2
);
4041 gimple_set_uid (g
, uid
);
4042 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4043 if (oe
->op
&& ctype
!= TREE_TYPE (oe
->op
))
4045 g
= gimple_build_assign (make_ssa_name (TREE_TYPE (oe
->op
)),
4046 NOP_EXPR
, gimple_assign_lhs (g
));
4047 gimple_set_uid (g
, uid
);
4048 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4050 ranges
[i
].exp
= gimple_assign_lhs (g
);
4051 oe
->op
= ranges
[i
].exp
;
4052 ranges
[i
].low
= build_zero_cst (TREE_TYPE (ranges
[i
].exp
));
4053 ranges
[i
].high
= ranges
[i
].low
;
4055 ranges
[i
].strict_overflow_p
= false;
4056 oe
= (*ops
)[ranges
[*idx
].idx
];
4057 /* Now change all the other range test immediate uses, so that
4058 those tests will be optimized away. */
4059 if (opcode
== ERROR_MARK
)
4062 oe
->op
= build_int_cst (TREE_TYPE (oe
->op
),
4063 oe
->rank
== BIT_IOR_EXPR
? 0 : 1);
4065 oe
->op
= (oe
->rank
== BIT_IOR_EXPR
4066 ? boolean_false_node
: boolean_true_node
);
4069 oe
->op
= error_mark_node
;
4070 ranges
[*idx
].exp
= NULL_TREE
;
4071 ranges
[*idx
].low
= NULL_TREE
;
4072 ranges
[*idx
].high
= NULL_TREE
;
4080 /* Optimize range tests, similarly how fold_range_test optimizes
4081 it on trees. The tree code for the binary
4082 operation between all the operands is OPCODE.
4083 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4084 maybe_optimize_range_tests for inter-bb range optimization.
4085 In that case if oe->op is NULL, oe->id is bb->index whose
4086 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4088 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4091 optimize_range_tests (enum tree_code opcode
,
4092 vec
<operand_entry
*> *ops
, basic_block first_bb
)
4094 unsigned int length
= ops
->length (), i
, j
, first
;
4096 struct range_entry
*ranges
;
4097 bool any_changes
= false;
4102 ranges
= XNEWVEC (struct range_entry
, length
);
4103 for (i
= 0; i
< length
; i
++)
4107 init_range_entry (ranges
+ i
, oe
->op
,
4110 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun
, oe
->id
)));
4111 /* For | invert it now, we will invert it again before emitting
4112 the optimized expression. */
4113 if (opcode
== BIT_IOR_EXPR
4114 || (opcode
== ERROR_MARK
&& oe
->rank
== BIT_IOR_EXPR
))
4115 ranges
[i
].in_p
= !ranges
[i
].in_p
;
4118 qsort (ranges
, length
, sizeof (*ranges
), range_entry_cmp
);
4119 for (i
= 0; i
< length
; i
++)
4120 if (ranges
[i
].exp
!= NULL_TREE
&& TREE_CODE (ranges
[i
].exp
) == SSA_NAME
)
4123 /* Try to merge ranges. */
4124 for (first
= i
; i
< length
; i
++)
4126 tree low
= ranges
[i
].low
;
4127 tree high
= ranges
[i
].high
;
4128 int in_p
= ranges
[i
].in_p
;
4129 bool strict_overflow_p
= ranges
[i
].strict_overflow_p
;
4130 int update_fail_count
= 0;
4132 for (j
= i
+ 1; j
< length
; j
++)
4134 if (ranges
[i
].exp
!= ranges
[j
].exp
)
4136 if (!merge_ranges (&in_p
, &low
, &high
, in_p
, low
, high
,
4137 ranges
[j
].in_p
, ranges
[j
].low
, ranges
[j
].high
))
4139 strict_overflow_p
|= ranges
[j
].strict_overflow_p
;
4145 if (update_range_test (ranges
+ i
, ranges
+ i
+ 1, NULL
, j
- i
- 1,
4146 opcode
, ops
, ranges
[i
].exp
, NULL
, in_p
,
4147 low
, high
, strict_overflow_p
))
4152 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4153 while update_range_test would fail. */
4154 else if (update_fail_count
== 64)
4157 ++update_fail_count
;
4160 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, true,
4163 if (BRANCH_COST (optimize_function_for_speed_p (cfun
), false) >= 2)
4164 any_changes
|= optimize_range_tests_1 (opcode
, first
, length
, false,
4166 if (lshift_cheap_p (optimize_function_for_speed_p (cfun
)))
4167 any_changes
|= optimize_range_tests_to_bit_test (opcode
, first
, length
,
4169 any_changes
|= optimize_range_tests_var_bound (opcode
, first
, length
, ops
,
4171 any_changes
|= optimize_range_tests_cmp_bitwise (opcode
, first
, length
,
4174 if (any_changes
&& opcode
!= ERROR_MARK
)
4177 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4179 if (oe
->op
== error_mark_node
)
4188 XDELETEVEC (ranges
);
4192 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4193 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4194 otherwise the comparison code. TYPE is a return value that is set
4195 to type of comparison. */
4198 ovce_extract_ops (tree var
, gassign
**rets
, bool *reti
, tree
*type
,
4199 tree
*lhs
, tree
*rhs
, gassign
**vcond
)
4201 if (TREE_CODE (var
) != SSA_NAME
)
4204 gassign
*stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (var
));
4210 /* ??? If we start creating more COND_EXPR, we could perform
4211 this same optimization with them. For now, simplify. */
4212 if (gimple_assign_rhs_code (stmt
) != VEC_COND_EXPR
)
4215 tree cond
= gimple_assign_rhs1 (stmt
);
4216 tree_code cmp
= TREE_CODE (cond
);
4217 if (cmp
!= SSA_NAME
)
4220 gassign
*assign
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (cond
));
4222 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign
)) != tcc_comparison
)
4225 cmp
= gimple_assign_rhs_code (assign
);
4227 *lhs
= gimple_assign_rhs1 (assign
);
4229 *rhs
= gimple_assign_rhs2 (assign
);
4231 /* ??? For now, allow only canonical true and false result vectors.
4232 We could expand this to other constants should the need arise,
4233 but at the moment we don't create them. */
4234 tree t
= gimple_assign_rhs2 (stmt
);
4235 tree f
= gimple_assign_rhs3 (stmt
);
4237 if (integer_all_onesp (t
))
4239 else if (integer_all_onesp (f
))
4241 cmp
= invert_tree_comparison (cmp
, false);
4246 if (!integer_zerop (f
))
4255 *type
= TREE_TYPE (cond
);
4259 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4260 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4263 optimize_vec_cond_expr (tree_code opcode
, vec
<operand_entry
*> *ops
)
4265 unsigned int length
= ops
->length (), i
, j
;
4266 bool any_changes
= false;
4271 for (i
= 0; i
< length
; ++i
)
4273 tree elt0
= (*ops
)[i
]->op
;
4275 gassign
*stmt0
, *vcond0
;
4277 tree type
, lhs0
, rhs0
;
4278 tree_code cmp0
= ovce_extract_ops (elt0
, &stmt0
, &invert
, &type
, &lhs0
,
4280 if (cmp0
== ERROR_MARK
)
4283 for (j
= i
+ 1; j
< length
; ++j
)
4285 tree
&elt1
= (*ops
)[j
]->op
;
4287 gassign
*stmt1
, *vcond1
;
4289 tree_code cmp1
= ovce_extract_ops (elt1
, &stmt1
, NULL
, NULL
, &lhs1
,
4291 if (cmp1
== ERROR_MARK
)
4295 if (opcode
== BIT_AND_EXPR
)
4296 comb
= maybe_fold_and_comparisons (type
, cmp0
, lhs0
, rhs0
,
4298 else if (opcode
== BIT_IOR_EXPR
)
4299 comb
= maybe_fold_or_comparisons (type
, cmp0
, lhs0
, rhs0
,
4307 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4309 fprintf (dump_file
, "Transforming ");
4310 print_generic_expr (dump_file
, gimple_assign_lhs (stmt0
));
4311 fprintf (dump_file
, " %c ", opcode
== BIT_AND_EXPR
? '&' : '|');
4312 print_generic_expr (dump_file
, gimple_assign_lhs (stmt1
));
4313 fprintf (dump_file
, " into ");
4314 print_generic_expr (dump_file
, comb
);
4315 fputc ('\n', dump_file
);
4318 gimple_stmt_iterator gsi
= gsi_for_stmt (vcond0
);
4319 tree exp
= force_gimple_operand_gsi (&gsi
, comb
, true, NULL_TREE
,
4320 true, GSI_SAME_STMT
);
4322 swap_ssa_operands (vcond0
, gimple_assign_rhs2_ptr (vcond0
),
4323 gimple_assign_rhs3_ptr (vcond0
));
4324 gimple_assign_set_rhs1 (vcond0
, exp
);
4325 update_stmt (vcond0
);
4327 elt1
= error_mark_node
;
4336 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
4338 if (oe
->op
== error_mark_node
)
4350 /* Return true if STMT is a cast like:
4356 # _345 = PHI <_123(N), 1(...), 1(...)>
4357 where _234 has bool type, _123 has single use and
4358 bb N has a single successor M. This is commonly used in
4359 the last block of a range test.
4361 Also Return true if STMT is tcc_compare like:
4367 # _345 = PHI <_234(N), 1(...), 1(...)>
4369 where _234 has booltype, single use and
4370 bb N has a single successor M. This is commonly used in
4371 the last block of a range test. */
4374 final_range_test_p (gimple
*stmt
)
4376 basic_block bb
, rhs_bb
, lhs_bb
;
4379 use_operand_p use_p
;
4382 if (!gimple_assign_cast_p (stmt
)
4383 && (!is_gimple_assign (stmt
)
4384 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4385 != tcc_comparison
)))
4387 bb
= gimple_bb (stmt
);
4388 if (!single_succ_p (bb
))
4390 e
= single_succ_edge (bb
);
4391 if (e
->flags
& EDGE_COMPLEX
)
4394 lhs
= gimple_assign_lhs (stmt
);
4395 rhs
= gimple_assign_rhs1 (stmt
);
4396 if (gimple_assign_cast_p (stmt
)
4397 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4398 || TREE_CODE (rhs
) != SSA_NAME
4399 || TREE_CODE (TREE_TYPE (rhs
)) != BOOLEAN_TYPE
))
4402 if (!gimple_assign_cast_p (stmt
)
4403 && (TREE_CODE (TREE_TYPE (lhs
)) != BOOLEAN_TYPE
))
4406 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4407 if (!single_imm_use (lhs
, &use_p
, &use_stmt
))
4410 if (gimple_code (use_stmt
) != GIMPLE_PHI
4411 || gimple_bb (use_stmt
) != e
->dest
)
4414 /* And that the rhs is defined in the same loop. */
4415 if (gimple_assign_cast_p (stmt
))
4417 if (TREE_CODE (rhs
) != SSA_NAME
4418 || !(rhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
4419 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), rhs_bb
))
4424 if (TREE_CODE (lhs
) != SSA_NAME
4425 || !(lhs_bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
4426 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt
), lhs_bb
))
4433 /* Return true if BB is suitable basic block for inter-bb range test
4434 optimization. If BACKWARD is true, BB should be the only predecessor
4435 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4436 or compared with to find a common basic block to which all conditions
4437 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4438 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4439 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4440 block points to an empty block that falls through into *OTHER_BB and
4441 the phi args match that path. */
4444 suitable_cond_bb (basic_block bb
, basic_block test_bb
, basic_block
*other_bb
,
4445 bool *test_swapped_p
, bool backward
)
4447 edge_iterator ei
, ei2
;
4451 bool other_edge_seen
= false;
4456 /* Check last stmt first. */
4457 stmt
= last_nondebug_stmt (bb
);
4459 || (gimple_code (stmt
) != GIMPLE_COND
4460 && (backward
|| !final_range_test_p (stmt
)))
4461 || gimple_visited_p (stmt
)
4462 || stmt_could_throw_p (cfun
, stmt
)
4465 is_cond
= gimple_code (stmt
) == GIMPLE_COND
;
4468 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4469 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4470 to *OTHER_BB (if not set yet, try to find it out). */
4471 if (EDGE_COUNT (bb
->succs
) != 2)
4473 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
4475 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4477 if (e
->dest
== test_bb
)
4486 if (*other_bb
== NULL
)
4488 FOR_EACH_EDGE (e2
, ei2
, test_bb
->succs
)
4489 if (!(e2
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
)))
4491 else if (e
->dest
== e2
->dest
)
4492 *other_bb
= e
->dest
;
4493 if (*other_bb
== NULL
)
4496 if (e
->dest
== *other_bb
)
4497 other_edge_seen
= true;
4501 if (*other_bb
== NULL
|| !other_edge_seen
)
4504 else if (single_succ (bb
) != *other_bb
)
4507 /* Now check all PHIs of *OTHER_BB. */
4508 e
= find_edge (bb
, *other_bb
);
4509 e2
= find_edge (test_bb
, *other_bb
);
4511 for (gsi
= gsi_start_phis (e
->dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4513 gphi
*phi
= gsi
.phi ();
4514 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4515 corresponding to BB and TEST_BB predecessor must be the same. */
4516 if (!operand_equal_p (gimple_phi_arg_def (phi
, e
->dest_idx
),
4517 gimple_phi_arg_def (phi
, e2
->dest_idx
), 0))
4519 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4520 one of the PHIs should have the lhs of the last stmt in
4521 that block as PHI arg and that PHI should have 0 or 1
4522 corresponding to it in all other range test basic blocks
4526 if (gimple_phi_arg_def (phi
, e
->dest_idx
)
4527 == gimple_assign_lhs (stmt
)
4528 && (integer_zerop (gimple_phi_arg_def (phi
, e2
->dest_idx
))
4529 || integer_onep (gimple_phi_arg_def (phi
,
4535 gimple
*test_last
= last_nondebug_stmt (test_bb
);
4536 if (gimple_code (test_last
) == GIMPLE_COND
)
4538 if (backward
? e2
->src
!= test_bb
: e
->src
!= bb
)
4541 /* For last_bb, handle also:
4543 goto <bb 6>; [34.00%]
4545 goto <bb 7>; [66.00%]
4547 <bb 6> [local count: 79512730]:
4549 <bb 7> [local count: 1073741824]:
4550 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4551 where bb 7 is *OTHER_BB, but the PHI values from the
4552 earlier bbs match the path through the empty bb
4556 e3
= EDGE_SUCC (test_bb
,
4557 e2
== EDGE_SUCC (test_bb
, 0) ? 1 : 0);
4560 e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4561 if (empty_block_p (e3
->dest
)
4562 && single_succ_p (e3
->dest
)
4563 && single_succ (e3
->dest
) == *other_bb
4564 && single_pred_p (e3
->dest
)
4565 && single_succ_edge (e3
->dest
)->flags
== EDGE_FALLTHRU
)
4568 e2
= single_succ_edge (e3
->dest
);
4570 e
= single_succ_edge (e3
->dest
);
4572 *test_swapped_p
= true;
4576 else if (gimple_phi_arg_def (phi
, e2
->dest_idx
)
4577 == gimple_assign_lhs (test_last
)
4578 && (integer_zerop (gimple_phi_arg_def (phi
,
4580 || integer_onep (gimple_phi_arg_def (phi
,
4591 /* Return true if BB doesn't have side-effects that would disallow
4592 range test optimization, all SSA_NAMEs set in the bb are consumed
4593 in the bb and there are no PHIs. */
4596 no_side_effect_bb (basic_block bb
)
4598 gimple_stmt_iterator gsi
;
4601 if (!gimple_seq_empty_p (phi_nodes (bb
)))
4603 last
= last_nondebug_stmt (bb
);
4604 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
4606 gimple
*stmt
= gsi_stmt (gsi
);
4608 imm_use_iterator imm_iter
;
4609 use_operand_p use_p
;
4611 if (is_gimple_debug (stmt
))
4613 if (gimple_has_side_effects (stmt
))
4617 if (!is_gimple_assign (stmt
))
4619 lhs
= gimple_assign_lhs (stmt
);
4620 if (TREE_CODE (lhs
) != SSA_NAME
)
4622 if (gimple_assign_rhs_could_trap_p (stmt
))
4624 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
4626 gimple
*use_stmt
= USE_STMT (use_p
);
4627 if (is_gimple_debug (use_stmt
))
4629 if (gimple_bb (use_stmt
) != bb
)
4636 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4637 return true and fill in *OPS recursively. */
4640 get_ops (tree var
, enum tree_code code
, vec
<operand_entry
*> *ops
,
4643 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4647 if (!is_reassociable_op (stmt
, code
, loop
))
4650 rhs
[0] = gimple_assign_rhs1 (stmt
);
4651 rhs
[1] = gimple_assign_rhs2 (stmt
);
4652 gimple_set_visited (stmt
, true);
4653 for (i
= 0; i
< 2; i
++)
4654 if (TREE_CODE (rhs
[i
]) == SSA_NAME
4655 && !get_ops (rhs
[i
], code
, ops
, loop
)
4656 && has_single_use (rhs
[i
]))
4658 operand_entry
*oe
= operand_entry_pool
.allocate ();
4664 oe
->stmt_to_insert
= NULL
;
4665 ops
->safe_push (oe
);
4670 /* Find the ops that were added by get_ops starting from VAR, see if
4671 they were changed during update_range_test and if yes, create new
4675 update_ops (tree var
, enum tree_code code
, const vec
<operand_entry
*> &ops
,
4676 unsigned int *pidx
, class loop
*loop
)
4678 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
4682 if (!is_reassociable_op (stmt
, code
, loop
))
4685 rhs
[0] = gimple_assign_rhs1 (stmt
);
4686 rhs
[1] = gimple_assign_rhs2 (stmt
);
4689 for (i
= 0; i
< 2; i
++)
4690 if (TREE_CODE (rhs
[i
]) == SSA_NAME
)
4692 rhs
[2 + i
] = update_ops (rhs
[i
], code
, ops
, pidx
, loop
);
4693 if (rhs
[2 + i
] == NULL_TREE
)
4695 if (has_single_use (rhs
[i
]))
4696 rhs
[2 + i
] = ops
[(*pidx
)++]->op
;
4698 rhs
[2 + i
] = rhs
[i
];
4701 if ((rhs
[2] != rhs
[0] || rhs
[3] != rhs
[1])
4702 && (rhs
[2] != rhs
[1] || rhs
[3] != rhs
[0]))
4704 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
4705 var
= make_ssa_name (TREE_TYPE (var
));
4706 gassign
*g
= gimple_build_assign (var
, gimple_assign_rhs_code (stmt
),
4708 gimple_set_uid (g
, gimple_uid (stmt
));
4709 gimple_set_visited (g
, true);
4710 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
4711 gimple_stmt_iterator gsi2
= gsi_for_stmt (g
);
4712 if (fold_stmt_inplace (&gsi2
))
4718 /* Structure to track the initial value passed to get_ops and
4719 the range in the ops vector for each basic block. */
4721 struct inter_bb_range_test_entry
4724 unsigned int first_idx
, last_idx
;
4727 /* Inter-bb range test optimization.
4729 Returns TRUE if a gimple conditional is optimized to a true/false,
4730 otherwise return FALSE.
4732 This indicates to the caller that it should run a CFG cleanup pass
4733 once reassociation is completed. */
4736 maybe_optimize_range_tests (gimple
*stmt
)
4738 basic_block first_bb
= gimple_bb (stmt
);
4739 basic_block last_bb
= first_bb
;
4740 basic_block other_bb
= NULL
;
4744 auto_vec
<operand_entry
*> ops
;
4745 auto_vec
<inter_bb_range_test_entry
> bbinfo
;
4746 bool any_changes
= false;
4747 bool cfg_cleanup_needed
= false;
4749 /* Consider only basic blocks that end with GIMPLE_COND or
4750 a cast statement satisfying final_range_test_p. All
4751 but the last bb in the first_bb .. last_bb range
4752 should end with GIMPLE_COND. */
4753 if (gimple_code (stmt
) == GIMPLE_COND
)
4755 if (EDGE_COUNT (first_bb
->succs
) != 2)
4756 return cfg_cleanup_needed
;
4758 else if (final_range_test_p (stmt
))
4759 other_bb
= single_succ (first_bb
);
4761 return cfg_cleanup_needed
;
4763 if (stmt_could_throw_p (cfun
, stmt
))
4764 return cfg_cleanup_needed
;
4766 /* As relative ordering of post-dominator sons isn't fixed,
4767 maybe_optimize_range_tests can be called first on any
4768 bb in the range we want to optimize. So, start searching
4769 backwards, if first_bb can be set to a predecessor. */
4770 while (single_pred_p (first_bb
))
4772 basic_block pred_bb
= single_pred (first_bb
);
4773 if (!suitable_cond_bb (pred_bb
, first_bb
, &other_bb
, NULL
, true))
4775 if (!no_side_effect_bb (first_bb
))
4779 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4780 Before starting forward search in last_bb successors, find
4781 out the other_bb. */
4782 if (first_bb
== last_bb
)
4785 /* As non-GIMPLE_COND last stmt always terminates the range,
4786 if forward search didn't discover anything, just give up. */
4787 if (gimple_code (stmt
) != GIMPLE_COND
)
4788 return cfg_cleanup_needed
;
4789 /* Look at both successors. Either it ends with a GIMPLE_COND
4790 and satisfies suitable_cond_bb, or ends with a cast and
4791 other_bb is that cast's successor. */
4792 FOR_EACH_EDGE (e
, ei
, first_bb
->succs
)
4793 if (!(e
->flags
& (EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
))
4794 || e
->dest
== first_bb
)
4795 return cfg_cleanup_needed
;
4796 else if (single_pred_p (e
->dest
))
4798 stmt
= last_nondebug_stmt (e
->dest
);
4800 && gimple_code (stmt
) == GIMPLE_COND
4801 && EDGE_COUNT (e
->dest
->succs
) == 2)
4803 if (suitable_cond_bb (first_bb
, e
->dest
, &other_bb
,
4810 && final_range_test_p (stmt
)
4811 && find_edge (first_bb
, single_succ (e
->dest
)))
4813 other_bb
= single_succ (e
->dest
);
4814 if (other_bb
== first_bb
)
4818 if (other_bb
== NULL
)
4819 return cfg_cleanup_needed
;
4821 /* Now do the forward search, moving last_bb to successor bbs
4822 that aren't other_bb. */
4823 while (EDGE_COUNT (last_bb
->succs
) == 2)
4825 FOR_EACH_EDGE (e
, ei
, last_bb
->succs
)
4826 if (e
->dest
!= other_bb
)
4830 if (!single_pred_p (e
->dest
))
4832 if (!suitable_cond_bb (e
->dest
, last_bb
, &other_bb
, NULL
, false))
4834 if (!no_side_effect_bb (e
->dest
))
4838 if (first_bb
== last_bb
)
4839 return cfg_cleanup_needed
;
4840 /* Here basic blocks first_bb through last_bb's predecessor
4841 end with GIMPLE_COND, all of them have one of the edges to
4842 other_bb and another to another block in the range,
4843 all blocks except first_bb don't have side-effects and
4844 last_bb ends with either GIMPLE_COND, or cast satisfying
4845 final_range_test_p. */
4846 for (bb
= last_bb
; ; bb
= single_pred (bb
))
4848 enum tree_code code
;
4850 inter_bb_range_test_entry bb_ent
;
4852 bb_ent
.op
= NULL_TREE
;
4853 bb_ent
.first_idx
= ops
.length ();
4854 bb_ent
.last_idx
= bb_ent
.first_idx
;
4855 e
= find_edge (bb
, other_bb
);
4856 stmt
= last_nondebug_stmt (bb
);
4857 gimple_set_visited (stmt
, true);
4858 if (gimple_code (stmt
) != GIMPLE_COND
)
4860 use_operand_p use_p
;
4865 lhs
= gimple_assign_lhs (stmt
);
4866 rhs
= gimple_assign_rhs1 (stmt
);
4867 gcc_assert (bb
== last_bb
);
4876 # _345 = PHI <_123(N), 1(...), 1(...)>
4878 or 0 instead of 1. If it is 0, the _234
4879 range test is anded together with all the
4880 other range tests, if it is 1, it is ored with
4882 single_imm_use (lhs
, &use_p
, &phi
);
4883 gcc_assert (gimple_code (phi
) == GIMPLE_PHI
);
4884 e2
= find_edge (first_bb
, other_bb
);
4886 gcc_assert (gimple_phi_arg_def (phi
, e
->dest_idx
) == lhs
);
4887 if (integer_zerop (gimple_phi_arg_def (phi
, d
)))
4888 code
= BIT_AND_EXPR
;
4891 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi
, d
)));
4892 code
= BIT_IOR_EXPR
;
4895 /* If _234 SSA_NAME_DEF_STMT is
4897 (or &, corresponding to 1/0 in the phi arguments,
4898 push into ops the individual range test arguments
4899 of the bitwise or resp. and, recursively. */
4900 if (TREE_CODE (rhs
) == SSA_NAME
4901 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4903 && !get_ops (rhs
, code
, &ops
,
4904 loop_containing_stmt (stmt
))
4905 && has_single_use (rhs
))
4907 /* Otherwise, push the _234 range test itself. */
4908 operand_entry
*oe
= operand_entry_pool
.allocate ();
4914 oe
->stmt_to_insert
= NULL
;
4919 else if (is_gimple_assign (stmt
)
4920 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
))
4922 && !get_ops (lhs
, code
, &ops
,
4923 loop_containing_stmt (stmt
))
4924 && has_single_use (lhs
))
4926 operand_entry
*oe
= operand_entry_pool
.allocate ();
4937 bb_ent
.last_idx
= ops
.length ();
4940 bbinfo
.safe_push (bb_ent
);
4941 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
4942 ops
[i
]->id
= bb
->index
;
4945 else if (bb
== last_bb
)
4947 /* For last_bb, handle also:
4949 goto <bb 6>; [34.00%]
4951 goto <bb 7>; [66.00%]
4953 <bb 6> [local count: 79512730]:
4955 <bb 7> [local count: 1073741824]:
4956 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4957 where bb 7 is OTHER_BB, but the PHI values from the
4958 earlier bbs match the path through the empty bb
4960 bool test_swapped_p
= false;
4961 bool ok
= suitable_cond_bb (single_pred (last_bb
), last_bb
,
4962 &other_bb
, &test_swapped_p
, true);
4965 e
= EDGE_SUCC (bb
, e
== EDGE_SUCC (bb
, 0) ? 1 : 0);
4967 /* Otherwise stmt is GIMPLE_COND. */
4968 code
= gimple_cond_code (stmt
);
4969 lhs
= gimple_cond_lhs (stmt
);
4970 rhs
= gimple_cond_rhs (stmt
);
4971 if (TREE_CODE (lhs
) == SSA_NAME
4972 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
4973 && ((code
!= EQ_EXPR
&& code
!= NE_EXPR
)
4974 || rhs
!= boolean_false_node
4975 /* Either push into ops the individual bitwise
4976 or resp. and operands, depending on which
4977 edge is other_bb. */
4978 || !get_ops (lhs
, (((e
->flags
& EDGE_TRUE_VALUE
) == 0)
4979 ^ (code
== EQ_EXPR
))
4980 ? BIT_AND_EXPR
: BIT_IOR_EXPR
, &ops
,
4981 loop_containing_stmt (stmt
))))
4983 /* Or push the GIMPLE_COND stmt itself. */
4984 operand_entry
*oe
= operand_entry_pool
.allocate ();
4987 oe
->rank
= (e
->flags
& EDGE_TRUE_VALUE
)
4988 ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
4989 /* oe->op = NULL signs that there is no SSA_NAME
4990 for the range test, and oe->id instead is the
4991 basic block number, at which's end the GIMPLE_COND
4995 oe
->stmt_to_insert
= NULL
;
5000 else if (ops
.length () > bb_ent
.first_idx
)
5003 bb_ent
.last_idx
= ops
.length ();
5005 bbinfo
.safe_push (bb_ent
);
5006 for (unsigned int i
= bb_ent
.first_idx
; i
< bb_ent
.last_idx
; ++i
)
5007 ops
[i
]->id
= bb
->index
;
5011 if (ops
.length () > 1)
5012 any_changes
= optimize_range_tests (ERROR_MARK
, &ops
, first_bb
);
5015 unsigned int idx
, max_idx
= 0;
5016 /* update_ops relies on has_single_use predicates returning the
5017 same values as it did during get_ops earlier. Additionally it
5018 never removes statements, only adds new ones and it should walk
5019 from the single imm use and check the predicate already before
5020 making those changes.
5021 On the other side, the handling of GIMPLE_COND directly can turn
5022 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5023 it needs to be done in a separate loop afterwards. */
5024 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5026 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5027 && bbinfo
[idx
].op
!= NULL_TREE
)
5032 stmt
= last_nondebug_stmt (bb
);
5033 new_op
= update_ops (bbinfo
[idx
].op
,
5035 ops
[bbinfo
[idx
].first_idx
]->rank
,
5036 ops
, &bbinfo
[idx
].first_idx
,
5037 loop_containing_stmt (stmt
));
5038 if (new_op
== NULL_TREE
)
5040 gcc_assert (bb
== last_bb
);
5041 new_op
= ops
[bbinfo
[idx
].first_idx
++]->op
;
5043 if (bbinfo
[idx
].op
!= new_op
)
5045 imm_use_iterator iter
;
5046 use_operand_p use_p
;
5047 gimple
*use_stmt
, *cast_or_tcc_cmp_stmt
= NULL
;
5049 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, bbinfo
[idx
].op
)
5050 if (is_gimple_debug (use_stmt
))
5052 else if (gimple_code (use_stmt
) == GIMPLE_COND
5053 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5054 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5055 SET_USE (use_p
, new_op
);
5056 else if ((is_gimple_assign (use_stmt
)
5058 (gimple_assign_rhs_code (use_stmt
))
5059 == tcc_comparison
)))
5060 cast_or_tcc_cmp_stmt
= use_stmt
;
5061 else if (gimple_assign_cast_p (use_stmt
))
5062 cast_or_tcc_cmp_stmt
= use_stmt
;
5066 if (cast_or_tcc_cmp_stmt
)
5068 gcc_assert (bb
== last_bb
);
5069 tree lhs
= gimple_assign_lhs (cast_or_tcc_cmp_stmt
);
5070 tree new_lhs
= make_ssa_name (TREE_TYPE (lhs
));
5071 enum tree_code rhs_code
5072 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt
)
5073 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt
)
5076 if (is_gimple_min_invariant (new_op
))
5078 new_op
= fold_convert (TREE_TYPE (lhs
), new_op
);
5079 g
= gimple_build_assign (new_lhs
, new_op
);
5082 g
= gimple_build_assign (new_lhs
, rhs_code
, new_op
);
5083 gimple_stmt_iterator gsi
5084 = gsi_for_stmt (cast_or_tcc_cmp_stmt
);
5085 gimple_set_uid (g
, gimple_uid (cast_or_tcc_cmp_stmt
));
5086 gimple_set_visited (g
, true);
5087 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5088 FOR_EACH_IMM_USE_STMT (use_stmt
, iter
, lhs
)
5089 if (is_gimple_debug (use_stmt
))
5091 else if (gimple_code (use_stmt
) == GIMPLE_COND
5092 || gimple_code (use_stmt
) == GIMPLE_PHI
)
5093 FOR_EACH_IMM_USE_ON_STMT (use_p
, iter
)
5094 SET_USE (use_p
, new_lhs
);
5103 for (bb
= last_bb
, idx
= 0; ; bb
= single_pred (bb
), idx
++)
5105 if (bbinfo
[idx
].first_idx
< bbinfo
[idx
].last_idx
5106 && bbinfo
[idx
].op
== NULL_TREE
5107 && ops
[bbinfo
[idx
].first_idx
]->op
!= NULL_TREE
)
5109 gcond
*cond_stmt
= as_a
<gcond
*> (*gsi_last_bb (bb
));
5114 /* If we collapse the conditional to a true/false
5115 condition, then bubble that knowledge up to our caller. */
5116 if (integer_zerop (ops
[bbinfo
[idx
].first_idx
]->op
))
5118 gimple_cond_make_false (cond_stmt
);
5119 cfg_cleanup_needed
= true;
5121 else if (integer_onep (ops
[bbinfo
[idx
].first_idx
]->op
))
5123 gimple_cond_make_true (cond_stmt
);
5124 cfg_cleanup_needed
= true;
5128 gimple_cond_set_code (cond_stmt
, NE_EXPR
);
5129 gimple_cond_set_lhs (cond_stmt
,
5130 ops
[bbinfo
[idx
].first_idx
]->op
);
5131 gimple_cond_set_rhs (cond_stmt
, boolean_false_node
);
5133 update_stmt (cond_stmt
);
5139 /* The above changes could result in basic blocks after the first
5140 modified one, up to and including last_bb, to be executed even if
5141 they would not be in the original program. If the value ranges of
5142 assignment lhs' in those bbs were dependent on the conditions
5143 guarding those basic blocks which now can change, the VRs might
5144 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5145 are only used within the same bb, it should be not a big deal if
5146 we just reset all the VRs in those bbs. See PR68671. */
5147 for (bb
= last_bb
, idx
= 0; idx
< max_idx
; bb
= single_pred (bb
), idx
++)
5148 reset_flow_sensitive_info_in_bb (bb
);
5150 return cfg_cleanup_needed
;
5153 /* Remove def stmt of VAR if VAR has zero uses and recurse
5154 on rhs1 operand if so. */
5157 remove_visited_stmt_chain (tree var
)
5160 gimple_stmt_iterator gsi
;
5164 if (TREE_CODE (var
) != SSA_NAME
|| !has_zero_uses (var
))
5166 stmt
= SSA_NAME_DEF_STMT (var
);
5167 if (is_gimple_assign (stmt
) && gimple_visited_p (stmt
))
5169 var
= gimple_assign_rhs1 (stmt
);
5170 gsi
= gsi_for_stmt (stmt
);
5171 reassoc_remove_stmt (&gsi
);
5172 release_defs (stmt
);
5179 /* This function checks three consequtive operands in
5180 passed operands vector OPS starting from OPINDEX and
5181 swaps two operands if it is profitable for binary operation
5182 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5184 We pair ops with the same rank if possible. */
5187 swap_ops_for_binary_stmt (const vec
<operand_entry
*> &ops
,
5188 unsigned int opindex
)
5190 operand_entry
*oe1
, *oe2
, *oe3
;
5193 oe2
= ops
[opindex
+ 1];
5194 oe3
= ops
[opindex
+ 2];
5196 if (oe1
->rank
== oe2
->rank
&& oe2
->rank
!= oe3
->rank
)
5197 std::swap (*oe1
, *oe3
);
5198 else if (oe1
->rank
== oe3
->rank
&& oe2
->rank
!= oe3
->rank
)
5199 std::swap (*oe1
, *oe2
);
5202 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5203 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5204 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5205 after the returned stmt. */
5207 static inline gimple
*
5208 find_insert_point (gimple
*stmt
, tree rhs1
, tree rhs2
, bool &insert_before
)
5210 insert_before
= true;
5211 if (TREE_CODE (rhs1
) == SSA_NAME
5212 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs1
)))
5214 stmt
= SSA_NAME_DEF_STMT (rhs1
);
5215 insert_before
= false;
5217 if (TREE_CODE (rhs2
) == SSA_NAME
5218 && reassoc_stmt_dominates_stmt_p (stmt
, SSA_NAME_DEF_STMT (rhs2
)))
5220 stmt
= SSA_NAME_DEF_STMT (rhs2
);
5221 insert_before
= false;
5226 /* If the stmt that defines operand has to be inserted, insert it
5229 insert_stmt_before_use (gimple
*stmt
, gimple
*stmt_to_insert
)
5231 gcc_assert (is_gimple_assign (stmt_to_insert
));
5232 tree rhs1
= gimple_assign_rhs1 (stmt_to_insert
);
5233 tree rhs2
= gimple_assign_rhs2 (stmt_to_insert
);
5235 gimple
*insert_point
= find_insert_point (stmt
, rhs1
, rhs2
, insert_before
);
5236 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_point
);
5237 gimple_set_uid (stmt_to_insert
, gimple_uid (insert_point
));
5239 /* If the insert point is not stmt, then insert_point would be
5240 the point where operand rhs1 or rhs2 is defined. In this case,
5241 stmt_to_insert has to be inserted afterwards. This would
5242 only happen when the stmt insertion point is flexible. */
5244 gsi_insert_before (&gsi
, stmt_to_insert
, GSI_NEW_STMT
);
5246 insert_stmt_after (stmt_to_insert
, insert_point
);
5250 /* Recursively rewrite our linearized statements so that the operators
5251 match those in OPS[OPINDEX], putting the computation in rank
5252 order. Return new lhs.
5253 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5254 the current stmt and during recursive invocations.
5255 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5256 recursive invocations. */
5259 rewrite_expr_tree (gimple
*stmt
, enum tree_code rhs_code
, unsigned int opindex
,
5260 const vec
<operand_entry
*> &ops
, bool changed
,
5263 tree rhs1
= gimple_assign_rhs1 (stmt
);
5264 tree rhs2
= gimple_assign_rhs2 (stmt
);
5265 tree lhs
= gimple_assign_lhs (stmt
);
5268 /* The final recursion case for this function is that you have
5269 exactly two operations left.
5270 If we had exactly one op in the entire list to start with, we
5271 would have never called this function, and the tail recursion
5272 rewrites them one at a time. */
5273 if (opindex
+ 2 == ops
.length ())
5275 operand_entry
*oe1
, *oe2
;
5278 oe2
= ops
[opindex
+ 1];
5280 if (rhs1
!= oe1
->op
|| rhs2
!= oe2
->op
)
5282 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5283 unsigned int uid
= gimple_uid (stmt
);
5285 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5287 fprintf (dump_file
, "Transforming ");
5288 print_gimple_stmt (dump_file
, stmt
, 0);
5291 /* If the stmt that defines operand has to be inserted, insert it
5293 if (oe1
->stmt_to_insert
)
5294 insert_stmt_before_use (stmt
, oe1
->stmt_to_insert
);
5295 if (oe2
->stmt_to_insert
)
5296 insert_stmt_before_use (stmt
, oe2
->stmt_to_insert
);
5297 /* Even when changed is false, reassociation could have e.g. removed
5298 some redundant operations, so unless we are just swapping the
5299 arguments or unless there is no change at all (then we just
5300 return lhs), force creation of a new SSA_NAME. */
5301 if (changed
|| ((rhs1
!= oe2
->op
|| rhs2
!= oe1
->op
) && opindex
))
5304 gimple
*insert_point
5305 = find_insert_point (stmt
, oe1
->op
, oe2
->op
, insert_before
);
5306 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5308 = gimple_build_assign (lhs
, rhs_code
,
5310 gimple_set_uid (stmt
, uid
);
5311 gimple_set_visited (stmt
, true);
5313 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5315 insert_stmt_after (stmt
, insert_point
);
5320 gcc_checking_assert (find_insert_point (stmt
, oe1
->op
, oe2
->op
,
5323 gimple_assign_set_rhs1 (stmt
, oe1
->op
);
5324 gimple_assign_set_rhs2 (stmt
, oe2
->op
);
5328 if (rhs1
!= oe1
->op
&& rhs1
!= oe2
->op
)
5329 remove_visited_stmt_chain (rhs1
);
5331 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5333 fprintf (dump_file
, " into ");
5334 print_gimple_stmt (dump_file
, stmt
, 0);
5340 /* If we hit here, we should have 3 or more ops left. */
5341 gcc_assert (opindex
+ 2 < ops
.length ());
5343 /* Rewrite the next operator. */
5346 /* If the stmt that defines operand has to be inserted, insert it
5348 if (oe
->stmt_to_insert
)
5349 insert_stmt_before_use (stmt
, oe
->stmt_to_insert
);
5351 /* Recurse on the LHS of the binary operator, which is guaranteed to
5352 be the non-leaf side. */
5354 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1
), rhs_code
, opindex
+ 1, ops
,
5355 changed
|| oe
->op
!= rhs2
|| next_changed
,
5358 if (oe
->op
!= rhs2
|| new_rhs1
!= rhs1
)
5360 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5362 fprintf (dump_file
, "Transforming ");
5363 print_gimple_stmt (dump_file
, stmt
, 0);
5366 /* If changed is false, this is either opindex == 0
5367 or all outer rhs2's were equal to corresponding oe->op,
5368 and powi_result is NULL.
5369 That means lhs is equivalent before and after reassociation.
5370 Otherwise ensure the old lhs SSA_NAME is not reused and
5371 create a new stmt as well, so that any debug stmts will be
5372 properly adjusted. */
5375 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
5376 unsigned int uid
= gimple_uid (stmt
);
5378 gimple
*insert_point
= find_insert_point (stmt
, new_rhs1
, oe
->op
,
5381 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5382 stmt
= gimple_build_assign (lhs
, rhs_code
,
5384 gimple_set_uid (stmt
, uid
);
5385 gimple_set_visited (stmt
, true);
5387 gsi_insert_before (&gsi
, stmt
, GSI_SAME_STMT
);
5389 insert_stmt_after (stmt
, insert_point
);
5394 gcc_checking_assert (find_insert_point (stmt
, new_rhs1
, oe
->op
,
5397 gimple_assign_set_rhs1 (stmt
, new_rhs1
);
5398 gimple_assign_set_rhs2 (stmt
, oe
->op
);
5402 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5404 fprintf (dump_file
, " into ");
5405 print_gimple_stmt (dump_file
, stmt
, 0);
5411 /* Find out how many cycles we need to compute statements chain.
5412 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5413 maximum number of independent statements we may execute per cycle. */
5416 get_required_cycles (int ops_num
, int cpu_width
)
5422 /* While we have more than 2 * cpu_width operands
5423 we may reduce number of operands by cpu_width
5425 res
= ops_num
/ (2 * cpu_width
);
5427 /* Remained operands count may be reduced twice per cycle
5428 until we have only one operand. */
5429 rest
= (unsigned)(ops_num
- res
* cpu_width
);
5430 elog
= exact_log2 (rest
);
5434 res
+= floor_log2 (rest
) + 1;
5439 /* Given that the target fully pipelines FMA instructions, return the latency
5440 of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5444 get_mult_latency_consider_fma (int ops_num
, int mult_num
, int width
)
5446 gcc_checking_assert (mult_num
&& mult_num
<= ops_num
);
5448 /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5454 _2 = .FMA (C, D, _1);
5456 Otherwise there's latency(MULT)*1 in the first FMA. */
5457 return CEIL (ops_num
, width
) == CEIL (mult_num
, width
) ? 2 : 1;
5460 /* Returns an optimal number of registers to use for computation of
5463 LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5464 that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5467 get_reassociation_width (vec
<operand_entry
*> *ops
, int mult_num
, tree lhs
,
5468 enum tree_code opc
, machine_mode mode
)
5470 int param_width
= param_tree_reassoc_width
;
5474 int ops_num
= ops
->length ();
5476 if (param_width
> 0)
5477 width
= param_width
;
5479 width
= targetm
.sched
.reassociation_width (opc
, mode
);
5484 /* Get the minimal time required for sequence computation. */
5485 cycles_best
= get_required_cycles (ops_num
, width
);
5487 /* Check if we may use less width and still compute sequence for
5488 the same time. It will allow us to reduce registers usage.
5489 get_required_cycles is monotonically increasing with lower width
5490 so we can perform a binary search for the minimal width that still
5491 results in the optimal cycle count. */
5494 /* If the target fully pipelines FMA instruction, the multiply part can start
5495 already if its operands are ready. Assuming symmetric pipes are used for
5496 FMUL/FADD/FMA, then for a sequence of FMA like:
5498 _8 = .FMA (_2, _3, _1);
5499 _9 = .FMA (_5, _4, _8);
5500 _10 = .FMA (_7, _6, _9);
5502 , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5506 _9 = .FMA (_2, _3, _1);
5507 _10 = .FMA (_6, _7, _8);
5510 , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5511 latency(ADD), the first variant is preferred.
5513 Find out if we can get a smaller width considering FMA.
5514 Assume FMUL and FMA use the same units that can also do FADD.
5515 For other scenarios, such as when FMUL and FADD are using separated units,
5516 the following code may not apply. */
5518 int width_mult
= targetm
.sched
.reassociation_width (MULT_EXPR
, mode
);
5519 if (width
> 1 && mult_num
&& param_fully_pipelined_fma
5520 && width_mult
<= width
)
5522 /* Latency of MULT_EXPRs. */
5524 = get_mult_latency_consider_fma (ops_num
, mult_num
, width_mult
);
5526 /* Quick search might not apply. So start from 1. */
5527 for (int i
= 1; i
< width_mult
; i
++)
5530 = get_mult_latency_consider_fma (ops_num
, mult_num
, i
);
5531 int lat_add_new
= get_required_cycles (ops_num
, i
);
5533 /* Assume latency(MULT) >= latency(ADD). */
5534 if (lat_mul
- lat_mul_new
>= lat_add_new
- cycles_best
)
5543 while (width
> width_min
)
5545 int width_mid
= (width
+ width_min
) / 2;
5547 if (get_required_cycles (ops_num
, width_mid
) == cycles_best
)
5549 else if (width_min
< width_mid
)
5550 width_min
= width_mid
;
5556 /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5557 better than skipping these FMA candidates in widening_mul. */
5559 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs
))),
5560 param_avoid_fma_max_bits
))
5562 /* Look for cross backedge dependency:
5563 1. LHS is a phi argument in the same basic block it is defined.
5564 2. And the result of the phi node is used in OPS. */
5565 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (lhs
));
5567 use_operand_p use_p
;
5568 imm_use_iterator iter
;
5569 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lhs
)
5570 if (gphi
*phi
= dyn_cast
<gphi
*> (USE_STMT (use_p
)))
5572 if (gimple_phi_arg_edge (phi
, phi_arg_index_from_use (use_p
))->src
5575 tree phi_result
= gimple_phi_result (phi
);
5578 FOR_EACH_VEC_ELT (*ops
, j
, oe
)
5580 if (TREE_CODE (oe
->op
) != SSA_NAME
)
5583 /* Result of phi is operand of PLUS_EXPR. */
5584 if (oe
->op
== phi_result
)
5587 /* Check is result of phi is operand of MULT_EXPR. */
5588 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
5589 if (is_gimple_assign (def_stmt
)
5590 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
)
5592 tree rhs
= gimple_assign_rhs1 (def_stmt
);
5593 if (TREE_CODE (rhs
) == SSA_NAME
)
5595 if (rhs
== phi_result
)
5597 def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5600 if (is_gimple_assign (def_stmt
)
5601 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
5603 if (gimple_assign_rhs1 (def_stmt
) == phi_result
5604 || gimple_assign_rhs2 (def_stmt
) == phi_result
)
5614 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5615 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5616 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5618 /* Rewrite statements with dependency chain with regard the chance to generate
5620 For the chain with FMA: Try to keep fma opportunity as much as possible.
5621 For the chain without FMA: Putting the computation in rank order and trying
5622 to allow operations to be executed in parallel.
5624 e + f + a * b + c * d;
5630 This reassociation approach preserves the chance of fma generation as much
5633 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5634 the whole chain will have dependencies across the loop iteration. Just keep
5635 loop-carried ops in a separate chain.
5637 x_1 = phi (x_0, x_2)
5638 y_1 = phi (y_0, y_2)
5640 a + b + c + d + e + x1 + y1
5650 rewrite_expr_tree_parallel (gassign
*stmt
, int width
, bool has_fma
,
5651 const vec
<operand_entry
*> &ops
)
5653 enum tree_code opcode
= gimple_assign_rhs_code (stmt
);
5654 int op_num
= ops
.length ();
5655 int op_normal_num
= op_num
;
5656 gcc_assert (op_num
> 0);
5657 int stmt_num
= op_num
- 1;
5658 gimple
**stmts
= XALLOCAVEC (gimple
*, stmt_num
);
5660 tree tmp_op
[2], op1
;
5662 gimple
*stmt1
= NULL
;
5663 tree last_rhs1
= gimple_assign_rhs1 (stmt
);
5664 int last_rhs1_stmt_index
= 0, last_rhs2_stmt_index
= 0;
5665 int width_active
= 0, width_count
= 0;
5666 bool has_biased
= false, ops_changed
= false;
5667 auto_vec
<operand_entry
*> ops_normal
;
5668 auto_vec
<operand_entry
*> ops_biased
;
5669 vec
<operand_entry
*> *ops1
;
5671 /* We start expression rewriting from the top statements.
5672 So, in this loop we create a full list of statements
5673 we will work with. */
5674 stmts
[stmt_num
- 1] = stmt
;
5675 for (i
= stmt_num
- 2; i
>= 0; i
--)
5676 stmts
[i
] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts
[i
+1]));
5678 /* Avoid adding loop-carried ops to long chains, first filter out the
5679 loop-carried. But we need to make sure that the length of the remainder
5680 is not less than 4, which is the smallest ops length we can break the
5682 FOR_EACH_VEC_ELT (ops
, i
, oe
)
5684 if (TREE_CODE (oe
->op
) == SSA_NAME
5685 && bitmap_bit_p (biased_names
, SSA_NAME_VERSION (oe
->op
))
5686 && op_normal_num
> 4)
5688 ops_biased
.safe_push (oe
);
5693 ops_normal
.safe_push (oe
);
5696 /* Width should not be larger than ops length / 2, since we can not create
5697 more parallel dependency chains that exceeds such value. */
5698 int width_normal
= op_normal_num
/ 2;
5699 int width_biased
= (op_num
- op_normal_num
) / 2;
5700 width_normal
= width
<= width_normal
? width
: width_normal
;
5701 width_biased
= width
<= width_biased
? width
: width_biased
;
5704 width_count
= width_active
= width_normal
;
5706 /* Build parallel dependency chain according to width. */
5707 for (i
= 0; i
< stmt_num
; i
++)
5709 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5711 fprintf (dump_file
, "Transforming ");
5712 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5715 /* When the work of normal ops is over, but the loop is not over,
5716 continue to do biased ops. */
5717 if (width_count
== 0 && ops1
== &ops_normal
)
5720 width_count
= width_active
= width_biased
;
5724 /* Swap the operands if no FMA in the chain. */
5725 if (ops1
->length () > 2 && !has_fma
)
5726 swap_ops_for_binary_stmt (*ops1
, ops1
->length () - 3);
5728 if (i
< width_active
5729 || (ops_changed
&& i
<= (last_rhs1_stmt_index
+ width_active
)))
5731 for (j
= 0; j
< 2; j
++)
5735 /* If the stmt that defines operand has to be inserted, insert it
5737 stmt1
= oe
->stmt_to_insert
;
5739 insert_stmt_before_use (stmts
[i
], stmt1
);
5742 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5746 gimple_set_visited (stmts
[i
], true);
5751 /* We keep original statement only for the last one. All others are
5753 if (!ops1
->length ())
5755 /* For biased length equal to 2. */
5756 if (width_count
== BIASED_END_STMT
&& !last_rhs2_stmt_index
)
5757 last_rhs2_stmt_index
= i
- 1;
5759 /* When width_count == 2 and there is no biased, just finish. */
5760 if (width_count
== NORMAL_END_STMT
&& !has_biased
)
5762 last_rhs1_stmt_index
= i
- 1;
5763 last_rhs2_stmt_index
= i
- 2;
5765 if (last_rhs1_stmt_index
&& (last_rhs2_stmt_index
|| !has_biased
))
5767 /* We keep original statement only for the last one. All
5768 others are recreated. */
5769 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5770 (stmts
[last_rhs1_stmt_index
]));
5771 gimple_assign_set_rhs2 (stmts
[i
], gimple_assign_lhs
5772 (stmts
[last_rhs2_stmt_index
]));
5773 update_stmt (stmts
[i
]);
5778 build_and_add_sum (TREE_TYPE (last_rhs1
),
5779 gimple_assign_lhs (stmts
[i
-width_count
]),
5781 (stmts
[i
-width_count
+1]),
5783 gimple_set_visited (stmts
[i
], true);
5786 /* It is the end of normal or biased ops.
5787 last_rhs1_stmt_index used to record the last stmt index
5788 for normal ops. last_rhs2_stmt_index used to record the
5789 last stmt index for biased ops. */
5790 if (width_count
== BIASED_END_STMT
)
5792 gcc_assert (has_biased
);
5793 if (ops_biased
.length ())
5794 last_rhs1_stmt_index
= i
;
5796 last_rhs2_stmt_index
= i
;
5803 /* Attach the rest ops to the parallel dependency chain. */
5806 stmt1
= oe
->stmt_to_insert
;
5808 insert_stmt_before_use (stmts
[i
], stmt1
);
5811 /* For only one biased ops. */
5812 if (width_count
== SPECIAL_BIASED_END_STMT
)
5814 /* We keep original statement only for the last one. All
5815 others are recreated. */
5816 gcc_assert (has_biased
);
5817 gimple_assign_set_rhs1 (stmts
[i
], gimple_assign_lhs
5818 (stmts
[last_rhs1_stmt_index
]));
5819 gimple_assign_set_rhs2 (stmts
[i
], op1
);
5820 update_stmt (stmts
[i
]);
5824 stmts
[i
] = build_and_add_sum (TREE_TYPE (last_rhs1
),
5826 (stmts
[i
-width_active
]),
5829 gimple_set_visited (stmts
[i
], true);
5834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5836 fprintf (dump_file
, " into ");
5837 print_gimple_stmt (dump_file
, stmts
[i
], 0);
5841 remove_visited_stmt_chain (last_rhs1
);
5844 /* Transform STMT, which is really (A +B) + (C + D) into the left
5845 linear form, ((A+B)+C)+D.
5846 Recurse on D if necessary. */
5849 linearize_expr (gimple
*stmt
)
5851 gimple_stmt_iterator gsi
;
5852 gimple
*binlhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
5853 gimple
*binrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5854 gimple
*oldbinrhs
= binrhs
;
5855 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
5856 gimple
*newbinrhs
= NULL
;
5857 class loop
*loop
= loop_containing_stmt (stmt
);
5858 tree lhs
= gimple_assign_lhs (stmt
);
5860 gcc_assert (is_reassociable_op (binlhs
, rhscode
, loop
)
5861 && is_reassociable_op (binrhs
, rhscode
, loop
));
5863 gsi
= gsi_for_stmt (stmt
);
5865 gimple_assign_set_rhs2 (stmt
, gimple_assign_rhs1 (binrhs
));
5866 binrhs
= gimple_build_assign (make_ssa_name (TREE_TYPE (lhs
)),
5867 gimple_assign_rhs_code (binrhs
),
5868 gimple_assign_lhs (binlhs
),
5869 gimple_assign_rhs2 (binrhs
));
5870 gimple_assign_set_rhs1 (stmt
, gimple_assign_lhs (binrhs
));
5871 gsi_insert_before (&gsi
, binrhs
, GSI_SAME_STMT
);
5872 gimple_set_uid (binrhs
, gimple_uid (stmt
));
5874 if (TREE_CODE (gimple_assign_rhs2 (stmt
)) == SSA_NAME
)
5875 newbinrhs
= SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt
));
5877 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5879 fprintf (dump_file
, "Linearized: ");
5880 print_gimple_stmt (dump_file
, stmt
, 0);
5883 reassociate_stats
.linearized
++;
5886 gsi
= gsi_for_stmt (oldbinrhs
);
5887 reassoc_remove_stmt (&gsi
);
5888 release_defs (oldbinrhs
);
5890 gimple_set_visited (stmt
, true);
5891 gimple_set_visited (binlhs
, true);
5892 gimple_set_visited (binrhs
, true);
5894 /* Tail recurse on the new rhs if it still needs reassociation. */
5895 if (newbinrhs
&& is_reassociable_op (newbinrhs
, rhscode
, loop
))
5896 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5897 want to change the algorithm while converting to tuples. */
5898 linearize_expr (stmt
);
5901 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5902 it. Otherwise, return NULL. */
5905 get_single_immediate_use (tree lhs
)
5907 use_operand_p immuse
;
5910 if (TREE_CODE (lhs
) == SSA_NAME
5911 && single_imm_use (lhs
, &immuse
, &immusestmt
)
5912 && is_gimple_assign (immusestmt
))
5918 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5919 representing the negated value. Insertions of any necessary
5920 instructions go before GSI.
5921 This function is recursive in that, if you hand it "a_5" as the
5922 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5923 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5926 negate_value (tree tonegate
, gimple_stmt_iterator
*gsip
)
5928 gimple
*negatedefstmt
= NULL
;
5929 tree resultofnegate
;
5930 gimple_stmt_iterator gsi
;
5933 /* If we are trying to negate a name, defined by an add, negate the
5934 add operands instead. */
5935 if (TREE_CODE (tonegate
) == SSA_NAME
)
5936 negatedefstmt
= SSA_NAME_DEF_STMT (tonegate
);
5937 if (TREE_CODE (tonegate
) == SSA_NAME
5938 && is_gimple_assign (negatedefstmt
)
5939 && TREE_CODE (gimple_assign_lhs (negatedefstmt
)) == SSA_NAME
5940 && has_single_use (gimple_assign_lhs (negatedefstmt
))
5941 && gimple_assign_rhs_code (negatedefstmt
) == PLUS_EXPR
)
5943 tree rhs1
= gimple_assign_rhs1 (negatedefstmt
);
5944 tree rhs2
= gimple_assign_rhs2 (negatedefstmt
);
5945 tree lhs
= gimple_assign_lhs (negatedefstmt
);
5948 gsi
= gsi_for_stmt (negatedefstmt
);
5949 rhs1
= negate_value (rhs1
, &gsi
);
5951 gsi
= gsi_for_stmt (negatedefstmt
);
5952 rhs2
= negate_value (rhs2
, &gsi
);
5954 gsi
= gsi_for_stmt (negatedefstmt
);
5955 lhs
= make_ssa_name (TREE_TYPE (lhs
));
5956 gimple_set_visited (negatedefstmt
, true);
5957 g
= gimple_build_assign (lhs
, PLUS_EXPR
, rhs1
, rhs2
);
5958 gimple_set_uid (g
, gimple_uid (negatedefstmt
));
5959 gsi_insert_before (&gsi
, g
, GSI_SAME_STMT
);
5963 tonegate
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (tonegate
), tonegate
);
5964 resultofnegate
= force_gimple_operand_gsi (gsip
, tonegate
, true,
5965 NULL_TREE
, true, GSI_SAME_STMT
);
5967 uid
= gimple_uid (gsi_stmt (gsi
));
5968 for (gsi_prev (&gsi
); !gsi_end_p (gsi
); gsi_prev (&gsi
))
5970 gimple
*stmt
= gsi_stmt (gsi
);
5971 if (gimple_uid (stmt
) != 0)
5973 gimple_set_uid (stmt
, uid
);
5975 return resultofnegate
;
5978 /* Return true if we should break up the subtract in STMT into an add
5979 with negate. This is true when we the subtract operands are really
5980 adds, or the subtract itself is used in an add expression. In
5981 either case, breaking up the subtract into an add with negate
5982 exposes the adds to reassociation. */
5985 should_break_up_subtract (gimple
*stmt
)
5987 tree lhs
= gimple_assign_lhs (stmt
);
5988 tree binlhs
= gimple_assign_rhs1 (stmt
);
5989 tree binrhs
= gimple_assign_rhs2 (stmt
);
5991 class loop
*loop
= loop_containing_stmt (stmt
);
5993 if (TREE_CODE (binlhs
) == SSA_NAME
5994 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs
), PLUS_EXPR
, loop
))
5997 if (TREE_CODE (binrhs
) == SSA_NAME
5998 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
), PLUS_EXPR
, loop
))
6001 if (TREE_CODE (lhs
) == SSA_NAME
6002 && (immusestmt
= get_single_immediate_use (lhs
))
6003 && is_gimple_assign (immusestmt
)
6004 && (gimple_assign_rhs_code (immusestmt
) == PLUS_EXPR
6005 || (gimple_assign_rhs_code (immusestmt
) == MINUS_EXPR
6006 && gimple_assign_rhs1 (immusestmt
) == lhs
)
6007 || gimple_assign_rhs_code (immusestmt
) == MULT_EXPR
))
6012 /* Transform STMT from A - B into A + -B. */
6015 break_up_subtract (gimple
*stmt
, gimple_stmt_iterator
*gsip
)
6017 tree rhs1
= gimple_assign_rhs1 (stmt
);
6018 tree rhs2
= gimple_assign_rhs2 (stmt
);
6020 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6022 fprintf (dump_file
, "Breaking up subtract ");
6023 print_gimple_stmt (dump_file
, stmt
, 0);
6026 rhs2
= negate_value (rhs2
, gsip
);
6027 gimple_assign_set_rhs_with_ops (gsip
, PLUS_EXPR
, rhs1
, rhs2
);
6031 /* Determine whether STMT is a builtin call that raises an SSA name
6032 to an integer power and has only one use. If so, and this is early
6033 reassociation and unsafe math optimizations are permitted, place
6034 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6035 If any of these conditions does not hold, return FALSE. */
6038 acceptable_pow_call (gcall
*stmt
, tree
*base
, HOST_WIDE_INT
*exponent
)
6041 REAL_VALUE_TYPE c
, cint
;
6043 switch (gimple_call_combined_fn (stmt
))
6046 if (flag_errno_math
)
6049 *base
= gimple_call_arg (stmt
, 0);
6050 arg1
= gimple_call_arg (stmt
, 1);
6052 if (TREE_CODE (arg1
) != REAL_CST
)
6055 c
= TREE_REAL_CST (arg1
);
6057 if (REAL_EXP (&c
) > HOST_BITS_PER_WIDE_INT
)
6060 *exponent
= real_to_integer (&c
);
6061 real_from_integer (&cint
, VOIDmode
, *exponent
, SIGNED
);
6062 if (!real_identical (&c
, &cint
))
6068 *base
= gimple_call_arg (stmt
, 0);
6069 arg1
= gimple_call_arg (stmt
, 1);
6071 if (!tree_fits_shwi_p (arg1
))
6074 *exponent
= tree_to_shwi (arg1
);
6081 /* Expanding negative exponents is generally unproductive, so we don't
6082 complicate matters with those. Exponents of zero and one should
6083 have been handled by expression folding. */
6084 if (*exponent
< 2 || TREE_CODE (*base
) != SSA_NAME
)
6090 /* Try to derive and add operand entry for OP to *OPS. Return false if
6094 try_special_add_to_ops (vec
<operand_entry
*> *ops
,
6095 enum tree_code code
,
6096 tree op
, gimple
* def_stmt
)
6098 tree base
= NULL_TREE
;
6099 HOST_WIDE_INT exponent
= 0;
6101 if (TREE_CODE (op
) != SSA_NAME
6102 || ! has_single_use (op
))
6105 if (code
== MULT_EXPR
6106 && reassoc_insert_powi_p
6107 && flag_unsafe_math_optimizations
6108 && is_gimple_call (def_stmt
)
6109 && acceptable_pow_call (as_a
<gcall
*> (def_stmt
), &base
, &exponent
))
6111 add_repeat_to_ops_vec (ops
, base
, exponent
);
6112 gimple_set_visited (def_stmt
, true);
6115 else if (code
== MULT_EXPR
6116 && is_gimple_assign (def_stmt
)
6117 && gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
6118 && !HONOR_SNANS (TREE_TYPE (op
))
6119 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op
))
6120 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op
)))
6121 && (!FLOAT_TYPE_P (TREE_TYPE (op
))
6122 || !DECIMAL_FLOAT_MODE_P (element_mode (op
))))
6124 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6125 tree cst
= build_minus_one_cst (TREE_TYPE (op
));
6126 add_to_ops_vec (ops
, rhs1
);
6127 add_to_ops_vec (ops
, cst
);
6128 gimple_set_visited (def_stmt
, true);
6135 /* Recursively linearize a binary expression that is the RHS of STMT.
6136 Place the operands of the expression tree in the vector named OPS. */
6139 linearize_expr_tree (vec
<operand_entry
*> *ops
, gimple
*stmt
,
6140 bool is_associative
, bool set_visited
)
6142 tree binlhs
= gimple_assign_rhs1 (stmt
);
6143 tree binrhs
= gimple_assign_rhs2 (stmt
);
6144 gimple
*binlhsdef
= NULL
, *binrhsdef
= NULL
;
6145 bool binlhsisreassoc
= false;
6146 bool binrhsisreassoc
= false;
6147 enum tree_code rhscode
= gimple_assign_rhs_code (stmt
);
6148 class loop
*loop
= loop_containing_stmt (stmt
);
6151 gimple_set_visited (stmt
, true);
6153 if (TREE_CODE (binlhs
) == SSA_NAME
)
6155 binlhsdef
= SSA_NAME_DEF_STMT (binlhs
);
6156 binlhsisreassoc
= (is_reassociable_op (binlhsdef
, rhscode
, loop
)
6157 && !stmt_could_throw_p (cfun
, binlhsdef
));
6160 if (TREE_CODE (binrhs
) == SSA_NAME
)
6162 binrhsdef
= SSA_NAME_DEF_STMT (binrhs
);
6163 binrhsisreassoc
= (is_reassociable_op (binrhsdef
, rhscode
, loop
)
6164 && !stmt_could_throw_p (cfun
, binrhsdef
));
6167 /* If the LHS is not reassociable, but the RHS is, we need to swap
6168 them. If neither is reassociable, there is nothing we can do, so
6169 just put them in the ops vector. If the LHS is reassociable,
6170 linearize it. If both are reassociable, then linearize the RHS
6173 if (!binlhsisreassoc
)
6175 /* If this is not a associative operation like division, give up. */
6176 if (!is_associative
)
6178 add_to_ops_vec (ops
, binrhs
);
6182 if (!binrhsisreassoc
)
6185 if (try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6186 /* If we add ops for the rhs we expect to be able to recurse
6187 to it via the lhs during expression rewrite so swap
6191 add_to_ops_vec (ops
, binrhs
);
6193 if (!try_special_add_to_ops (ops
, rhscode
, binlhs
, binlhsdef
))
6194 add_to_ops_vec (ops
, binlhs
);
6200 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6202 fprintf (dump_file
, "swapping operands of ");
6203 print_gimple_stmt (dump_file
, stmt
, 0);
6206 swap_ssa_operands (stmt
,
6207 gimple_assign_rhs1_ptr (stmt
),
6208 gimple_assign_rhs2_ptr (stmt
));
6211 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6213 fprintf (dump_file
, " is now ");
6214 print_gimple_stmt (dump_file
, stmt
, 0);
6216 if (!binrhsisreassoc
)
6219 /* We want to make it so the lhs is always the reassociative op,
6221 std::swap (binlhs
, binrhs
);
6223 else if (binrhsisreassoc
)
6225 linearize_expr (stmt
);
6226 binlhs
= gimple_assign_rhs1 (stmt
);
6227 binrhs
= gimple_assign_rhs2 (stmt
);
6230 gcc_assert (TREE_CODE (binrhs
) != SSA_NAME
6231 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs
),
6233 linearize_expr_tree (ops
, SSA_NAME_DEF_STMT (binlhs
),
6234 is_associative
, set_visited
);
6236 if (!try_special_add_to_ops (ops
, rhscode
, binrhs
, binrhsdef
))
6237 add_to_ops_vec (ops
, binrhs
);
6240 /* Repropagate the negates back into subtracts, since no other pass
6241 currently does it. */
6244 repropagate_negates (void)
6249 FOR_EACH_VEC_ELT (plus_negates
, i
, negate
)
6251 gimple
*user
= get_single_immediate_use (negate
);
6252 if (!user
|| !is_gimple_assign (user
))
6255 tree negateop
= gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate
));
6256 if (TREE_CODE (negateop
) == SSA_NAME
6257 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop
))
6260 /* The negate operand can be either operand of a PLUS_EXPR
6261 (it can be the LHS if the RHS is a constant for example).
6263 Force the negate operand to the RHS of the PLUS_EXPR, then
6264 transform the PLUS_EXPR into a MINUS_EXPR. */
6265 if (gimple_assign_rhs_code (user
) == PLUS_EXPR
)
6267 /* If the negated operand appears on the LHS of the
6268 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6269 to force the negated operand to the RHS of the PLUS_EXPR. */
6270 if (gimple_assign_rhs1 (user
) == negate
)
6272 swap_ssa_operands (user
,
6273 gimple_assign_rhs1_ptr (user
),
6274 gimple_assign_rhs2_ptr (user
));
6277 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6278 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6279 if (gimple_assign_rhs2 (user
) == negate
)
6281 tree rhs1
= gimple_assign_rhs1 (user
);
6282 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6283 gimple_assign_set_rhs_with_ops (&gsi
, MINUS_EXPR
, rhs1
,
6288 else if (gimple_assign_rhs_code (user
) == MINUS_EXPR
)
6290 if (gimple_assign_rhs1 (user
) == negate
)
6295 which we transform into
6298 This pushes down the negate which we possibly can merge
6299 into some other operation, hence insert it into the
6300 plus_negates vector. */
6301 gimple
*feed
= SSA_NAME_DEF_STMT (negate
);
6302 tree b
= gimple_assign_rhs2 (user
);
6303 gimple_stmt_iterator gsi
= gsi_for_stmt (feed
);
6304 gimple_stmt_iterator gsi2
= gsi_for_stmt (user
);
6305 tree x
= make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed
)));
6306 gimple
*g
= gimple_build_assign (x
, PLUS_EXPR
, negateop
, b
);
6307 gsi_insert_before (&gsi2
, g
, GSI_SAME_STMT
);
6308 gimple_assign_set_rhs_with_ops (&gsi2
, NEGATE_EXPR
, x
);
6309 user
= gsi_stmt (gsi2
);
6311 reassoc_remove_stmt (&gsi
);
6312 release_defs (feed
);
6313 plus_negates
.safe_push (gimple_assign_lhs (user
));
6317 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6318 getting rid of one operation. */
6319 tree rhs1
= gimple_assign_rhs1 (user
);
6320 gimple_stmt_iterator gsi
= gsi_for_stmt (user
);
6321 gimple_assign_set_rhs_with_ops (&gsi
, PLUS_EXPR
, rhs1
, negateop
);
6322 update_stmt (gsi_stmt (gsi
));
6328 /* Break up subtract operations in block BB.
6330 We do this top down because we don't know whether the subtract is
6331 part of a possible chain of reassociation except at the top.
6340 we want to break up k = t - q, but we won't until we've transformed q
6341 = b - r, which won't be broken up until we transform b = c - d.
6343 En passant, clear the GIMPLE visited flag on every statement
6344 and set UIDs within each basic block. */
6347 break_up_subtract_bb (basic_block bb
)
6349 gimple_stmt_iterator gsi
;
6350 unsigned int uid
= 1;
6352 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6354 gimple
*stmt
= gsi_stmt (gsi
);
6355 gimple_set_visited (stmt
, false);
6356 gimple_set_uid (stmt
, uid
++);
6358 if (!is_gimple_assign (stmt
)
6359 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt
)))
6360 || !can_reassociate_op_p (gimple_assign_lhs (stmt
)))
6363 /* Look for simple gimple subtract operations. */
6364 if (gimple_assign_rhs_code (stmt
) == MINUS_EXPR
)
6366 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt
))
6367 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt
)))
6370 /* Check for a subtract used only in an addition. If this
6371 is the case, transform it into add of a negate for better
6372 reassociation. IE transform C = A-B into C = A + -B if C
6373 is only used in an addition. */
6374 if (should_break_up_subtract (stmt
))
6375 break_up_subtract (stmt
, &gsi
);
6377 else if (gimple_assign_rhs_code (stmt
) == NEGATE_EXPR
6378 && can_reassociate_op_p (gimple_assign_rhs1 (stmt
)))
6379 plus_negates
.safe_push (gimple_assign_lhs (stmt
));
6383 /* Used for repeated factor analysis. */
6384 struct repeat_factor
6386 /* An SSA name that occurs in a multiply chain. */
6389 /* Cached rank of the factor. */
6392 /* Number of occurrences of the factor in the chain. */
6393 HOST_WIDE_INT count
;
6395 /* An SSA name representing the product of this factor and
6396 all factors appearing later in the repeated factor vector. */
6401 static vec
<repeat_factor
> repeat_factor_vec
;
6403 /* Used for sorting the repeat factor vector. Sort primarily by
6404 ascending occurrence count, secondarily by descending rank. */
6407 compare_repeat_factors (const void *x1
, const void *x2
)
6409 const repeat_factor
*rf1
= (const repeat_factor
*) x1
;
6410 const repeat_factor
*rf2
= (const repeat_factor
*) x2
;
6412 if (rf1
->count
< rf2
->count
)
6414 else if (rf1
->count
> rf2
->count
)
6417 if (rf1
->rank
< rf2
->rank
)
6419 else if (rf1
->rank
> rf2
->rank
)
6425 /* Look for repeated operands in OPS in the multiply tree rooted at
6426 STMT. Replace them with an optimal sequence of multiplies and powi
6427 builtin calls, and remove the used operands from OPS. Return an
6428 SSA name representing the value of the replacement sequence. */
6431 attempt_builtin_powi (gimple
*stmt
, vec
<operand_entry
*> *ops
)
6433 unsigned i
, j
, vec_len
;
6436 repeat_factor
*rf1
, *rf2
;
6437 repeat_factor rfnew
;
6438 tree result
= NULL_TREE
;
6439 tree target_ssa
, iter_result
;
6440 tree type
= TREE_TYPE (gimple_get_lhs (stmt
));
6441 tree powi_fndecl
= mathfn_built_in (type
, BUILT_IN_POWI
);
6442 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
6443 gimple
*mul_stmt
, *pow_stmt
;
6445 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6446 target, unless type is integral. */
6447 if (!powi_fndecl
&& !INTEGRAL_TYPE_P (type
))
6450 /* Allocate the repeated factor vector. */
6451 repeat_factor_vec
.create (10);
6453 /* Scan the OPS vector for all SSA names in the product and build
6454 up a vector of occurrence counts for each factor. */
6455 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6457 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6459 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6461 if (rf1
->factor
== oe
->op
)
6463 rf1
->count
+= oe
->count
;
6468 if (j
>= repeat_factor_vec
.length ())
6470 rfnew
.factor
= oe
->op
;
6471 rfnew
.rank
= oe
->rank
;
6472 rfnew
.count
= oe
->count
;
6473 rfnew
.repr
= NULL_TREE
;
6474 repeat_factor_vec
.safe_push (rfnew
);
6479 /* Sort the repeated factor vector by (a) increasing occurrence count,
6480 and (b) decreasing rank. */
6481 repeat_factor_vec
.qsort (compare_repeat_factors
);
6483 /* It is generally best to combine as many base factors as possible
6484 into a product before applying __builtin_powi to the result.
6485 However, the sort order chosen for the repeated factor vector
6486 allows us to cache partial results for the product of the base
6487 factors for subsequent use. When we already have a cached partial
6488 result from a previous iteration, it is best to make use of it
6489 before looking for another __builtin_pow opportunity.
6491 As an example, consider x * x * y * y * y * z * z * z * z.
6492 We want to first compose the product x * y * z, raise it to the
6493 second power, then multiply this by y * z, and finally multiply
6494 by z. This can be done in 5 multiplies provided we cache y * z
6495 for use in both expressions:
6503 If we instead ignored the cached y * z and first multiplied by
6504 the __builtin_pow opportunity z * z, we would get the inferior:
6513 vec_len
= repeat_factor_vec
.length ();
6515 /* Repeatedly look for opportunities to create a builtin_powi call. */
6518 HOST_WIDE_INT power
;
6520 /* First look for the largest cached product of factors from
6521 preceding iterations. If found, create a builtin_powi for
6522 it if the minimum occurrence count for its factors is at
6523 least 2, or just use this cached product as our next
6524 multiplicand if the minimum occurrence count is 1. */
6525 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6527 if (rf1
->repr
&& rf1
->count
> 0)
6537 iter_result
= rf1
->repr
;
6539 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6543 fputs ("Multiplying by cached product ", dump_file
);
6544 for (elt
= j
; elt
< vec_len
; elt
++)
6546 rf
= &repeat_factor_vec
[elt
];
6547 print_generic_expr (dump_file
, rf
->factor
);
6548 if (elt
< vec_len
- 1)
6549 fputs (" * ", dump_file
);
6551 fputs ("\n", dump_file
);
6556 if (INTEGRAL_TYPE_P (type
))
6558 gcc_assert (power
> 1);
6559 gimple_stmt_iterator gsip
= gsi
;
6561 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6563 gimple_stmt_iterator gsic
= gsi
;
6564 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6566 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6567 gimple_set_visited (gsi_stmt (gsic
), true);
6573 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6575 = gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6576 build_int_cst (integer_type_node
,
6578 gimple_call_set_lhs (pow_stmt
, iter_result
);
6579 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6580 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6581 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6584 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6588 fputs ("Building __builtin_pow call for cached product (",
6590 for (elt
= j
; elt
< vec_len
; elt
++)
6592 rf
= &repeat_factor_vec
[elt
];
6593 print_generic_expr (dump_file
, rf
->factor
);
6594 if (elt
< vec_len
- 1)
6595 fputs (" * ", dump_file
);
6597 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n",
6604 /* Otherwise, find the first factor in the repeated factor
6605 vector whose occurrence count is at least 2. If no such
6606 factor exists, there are no builtin_powi opportunities
6608 FOR_EACH_VEC_ELT (repeat_factor_vec
, j
, rf1
)
6610 if (rf1
->count
>= 2)
6619 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6623 fputs ("Building __builtin_pow call for (", dump_file
);
6624 for (elt
= j
; elt
< vec_len
; elt
++)
6626 rf
= &repeat_factor_vec
[elt
];
6627 print_generic_expr (dump_file
, rf
->factor
);
6628 if (elt
< vec_len
- 1)
6629 fputs (" * ", dump_file
);
6631 fprintf (dump_file
, ")^" HOST_WIDE_INT_PRINT_DEC
"\n", power
);
6634 reassociate_stats
.pows_created
++;
6636 /* Visit each element of the vector in reverse order (so that
6637 high-occurrence elements are visited first, and within the
6638 same occurrence count, lower-ranked elements are visited
6639 first). Form a linear product of all elements in this order
6640 whose occurrencce count is at least that of element J.
6641 Record the SSA name representing the product of each element
6642 with all subsequent elements in the vector. */
6643 if (j
== vec_len
- 1)
6644 rf1
->repr
= rf1
->factor
;
6647 for (ii
= vec_len
- 2; ii
>= (int)j
; ii
--)
6651 rf1
= &repeat_factor_vec
[ii
];
6652 rf2
= &repeat_factor_vec
[ii
+ 1];
6654 /* Init the last factor's representative to be itself. */
6656 rf2
->repr
= rf2
->factor
;
6661 target_ssa
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6662 mul_stmt
= gimple_build_assign (target_ssa
, MULT_EXPR
,
6664 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6665 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6666 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6667 rf1
->repr
= target_ssa
;
6669 /* Don't reprocess the multiply we just introduced. */
6670 gimple_set_visited (mul_stmt
, true);
6674 /* Form a call to __builtin_powi for the maximum product
6675 just formed, raised to the power obtained earlier. */
6676 rf1
= &repeat_factor_vec
[j
];
6677 if (INTEGRAL_TYPE_P (type
))
6679 gcc_assert (power
> 1);
6680 gimple_stmt_iterator gsip
= gsi
;
6682 iter_result
= powi_as_mults (&gsi
, gimple_location (stmt
),
6684 gimple_stmt_iterator gsic
= gsi
;
6685 while (gsi_stmt (gsic
) != gsi_stmt (gsip
))
6687 gimple_set_uid (gsi_stmt (gsic
), gimple_uid (stmt
));
6688 gimple_set_visited (gsi_stmt (gsic
), true);
6694 iter_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6695 pow_stmt
= gimple_build_call (powi_fndecl
, 2, rf1
->repr
,
6696 build_int_cst (integer_type_node
,
6698 gimple_call_set_lhs (pow_stmt
, iter_result
);
6699 gimple_set_location (pow_stmt
, gimple_location (stmt
));
6700 gimple_set_uid (pow_stmt
, gimple_uid (stmt
));
6701 gsi_insert_before (&gsi
, pow_stmt
, GSI_SAME_STMT
);
6705 /* If we previously formed at least one other builtin_powi call,
6706 form the product of this one and those others. */
6709 tree new_result
= make_temp_ssa_name (type
, NULL
, "reassocpow");
6710 mul_stmt
= gimple_build_assign (new_result
, MULT_EXPR
,
6711 result
, iter_result
);
6712 gimple_set_location (mul_stmt
, gimple_location (stmt
));
6713 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
6714 gsi_insert_before (&gsi
, mul_stmt
, GSI_SAME_STMT
);
6715 gimple_set_visited (mul_stmt
, true);
6716 result
= new_result
;
6719 result
= iter_result
;
6721 /* Decrement the occurrence count of each element in the product
6722 by the count found above, and remove this many copies of each
6724 for (i
= j
; i
< vec_len
; i
++)
6729 rf1
= &repeat_factor_vec
[i
];
6730 rf1
->count
-= power
;
6732 FOR_EACH_VEC_ELT_REVERSE (*ops
, n
, oe
)
6734 if (oe
->op
== rf1
->factor
)
6738 ops
->ordered_remove (n
);
6754 /* At this point all elements in the repeated factor vector have a
6755 remaining occurrence count of 0 or 1, and those with a count of 1
6756 don't have cached representatives. Re-sort the ops vector and
6758 ops
->qsort (sort_by_operand_rank
);
6759 repeat_factor_vec
.release ();
6761 /* Return the final product computed herein. Note that there may
6762 still be some elements with single occurrence count left in OPS;
6763 those will be handled by the normal reassociation logic. */
6767 /* Attempt to optimize
6768 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6769 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6772 attempt_builtin_copysign (vec
<operand_entry
*> *ops
)
6776 unsigned int length
= ops
->length ();
6777 tree cst
= ops
->last ()->op
;
6779 if (length
== 1 || TREE_CODE (cst
) != REAL_CST
)
6782 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6784 if (TREE_CODE (oe
->op
) == SSA_NAME
6785 && has_single_use (oe
->op
))
6787 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6788 if (gcall
*old_call
= dyn_cast
<gcall
*> (def_stmt
))
6791 switch (gimple_call_combined_fn (old_call
))
6794 CASE_CFN_COPYSIGN_FN
:
6795 arg0
= gimple_call_arg (old_call
, 0);
6796 arg1
= gimple_call_arg (old_call
, 1);
6797 /* The first argument of copysign must be a constant,
6798 otherwise there's nothing to do. */
6799 if (TREE_CODE (arg0
) == REAL_CST
)
6801 tree type
= TREE_TYPE (arg0
);
6802 tree mul
= const_binop (MULT_EXPR
, type
, cst
, arg0
);
6803 /* If we couldn't fold to a single constant, skip it.
6804 That happens e.g. for inexact multiplication when
6806 if (mul
== NULL_TREE
)
6808 /* Instead of adjusting OLD_CALL, let's build a new
6809 call to not leak the LHS and prevent keeping bogus
6810 debug statements. DCE will clean up the old call. */
6812 if (gimple_call_internal_p (old_call
))
6813 new_call
= gimple_build_call_internal
6814 (IFN_COPYSIGN
, 2, mul
, arg1
);
6816 new_call
= gimple_build_call
6817 (gimple_call_fndecl (old_call
), 2, mul
, arg1
);
6818 tree lhs
= make_ssa_name (type
);
6819 gimple_call_set_lhs (new_call
, lhs
);
6820 gimple_set_location (new_call
,
6821 gimple_location (old_call
));
6822 insert_stmt_after (new_call
, old_call
);
6823 /* We've used the constant, get rid of it. */
6825 bool cst1_neg
= real_isneg (TREE_REAL_CST_PTR (cst
));
6826 /* Handle the CST1 < 0 case by negating the result. */
6829 tree negrhs
= make_ssa_name (TREE_TYPE (lhs
));
6831 = gimple_build_assign (negrhs
, NEGATE_EXPR
, lhs
);
6832 insert_stmt_after (negate_stmt
, new_call
);
6837 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6839 fprintf (dump_file
, "Optimizing copysign: ");
6840 print_generic_expr (dump_file
, cst
);
6841 fprintf (dump_file
, " * COPYSIGN (");
6842 print_generic_expr (dump_file
, arg0
);
6843 fprintf (dump_file
, ", ");
6844 print_generic_expr (dump_file
, arg1
);
6845 fprintf (dump_file
, ") into %sCOPYSIGN (",
6846 cst1_neg
? "-" : "");
6847 print_generic_expr (dump_file
, mul
);
6848 fprintf (dump_file
, ", ");
6849 print_generic_expr (dump_file
, arg1
);
6850 fprintf (dump_file
, "\n");
6863 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6866 transform_stmt_to_copy (gimple_stmt_iterator
*gsi
, gimple
*stmt
, tree new_rhs
)
6870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6872 fprintf (dump_file
, "Transforming ");
6873 print_gimple_stmt (dump_file
, stmt
, 0);
6876 rhs1
= gimple_assign_rhs1 (stmt
);
6877 gimple_assign_set_rhs_from_tree (gsi
, new_rhs
);
6879 remove_visited_stmt_chain (rhs1
);
6881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6883 fprintf (dump_file
, " into ");
6884 print_gimple_stmt (dump_file
, stmt
, 0);
6888 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6891 transform_stmt_to_multiply (gimple_stmt_iterator
*gsi
, gimple
*stmt
,
6892 tree rhs1
, tree rhs2
)
6894 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6896 fprintf (dump_file
, "Transforming ");
6897 print_gimple_stmt (dump_file
, stmt
, 0);
6900 gimple_assign_set_rhs_with_ops (gsi
, MULT_EXPR
, rhs1
, rhs2
);
6901 update_stmt (gsi_stmt (*gsi
));
6902 remove_visited_stmt_chain (rhs1
);
6904 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6906 fprintf (dump_file
, " into ");
6907 print_gimple_stmt (dump_file
, stmt
, 0);
6911 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6912 Put no-mult ops and mult ops alternately at the end of the queue, which is
6913 conducive to generating more FMA and reducing the loss of FMA when breaking
6916 a * b + c * d + e generates:
6918 _4 = c_9(D) * d_10(D);
6919 _12 = .FMA (a_7(D), b_8(D), _4);
6922 Rearrange ops to -> e + a * b + c * d generates:
6924 _4 = .FMA (c_7(D), d_8(D), _3);
6925 _11 = .FMA (a_5(D), b_6(D), _4);
6927 Return the number of MULT_EXPRs in the chain. */
6929 rank_ops_for_fma (vec
<operand_entry
*> *ops
)
6933 unsigned int ops_length
= ops
->length ();
6934 auto_vec
<operand_entry
*> ops_mult
;
6935 auto_vec
<operand_entry
*> ops_others
;
6937 FOR_EACH_VEC_ELT (*ops
, i
, oe
)
6939 if (TREE_CODE (oe
->op
) == SSA_NAME
)
6941 gimple
*def_stmt
= SSA_NAME_DEF_STMT (oe
->op
);
6942 if (is_gimple_assign (def_stmt
))
6944 if (gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
)
6945 ops_mult
.safe_push (oe
);
6946 /* A negate on the multiplication leads to FNMA. */
6947 else if (gimple_assign_rhs_code (def_stmt
) == NEGATE_EXPR
6948 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == SSA_NAME
)
6950 gimple
*neg_def_stmt
6951 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt
));
6952 if (is_gimple_assign (neg_def_stmt
)
6953 && gimple_bb (neg_def_stmt
) == gimple_bb (def_stmt
)
6954 && gimple_assign_rhs_code (neg_def_stmt
) == MULT_EXPR
)
6955 ops_mult
.safe_push (oe
);
6957 ops_others
.safe_push (oe
);
6960 ops_others
.safe_push (oe
);
6963 ops_others
.safe_push (oe
);
6966 ops_others
.safe_push (oe
);
6968 /* 1. When ops_mult.length == 2, like the following case,
6972 we need to rearrange the ops.
6974 Putting ops that not def from mult in front can generate more FMAs.
6976 2. If all ops are defined with mult, we don't need to rearrange them. */
6977 unsigned mult_num
= ops_mult
.length ();
6978 if (mult_num
>= 2 && mult_num
!= ops_length
)
6980 /* Put no-mult ops and mult ops alternately at the end of the
6981 queue, which is conducive to generating more FMA and reducing the
6982 loss of FMA when breaking the chain. */
6984 ops
->splice (ops_mult
);
6985 int j
, opindex
= ops
->length ();
6986 int others_length
= ops_others
.length ();
6987 for (j
= 0; j
< others_length
; j
++)
6989 oe
= ops_others
.pop ();
6990 ops
->quick_insert (opindex
, oe
);
6997 /* Reassociate expressions in basic block BB and its post-dominator as
7000 Bubble up return status from maybe_optimize_range_tests. */
7003 reassociate_bb (basic_block bb
)
7005 gimple_stmt_iterator gsi
;
7006 gimple
*stmt
= last_nondebug_stmt (bb
);
7007 bool cfg_cleanup_needed
= false;
7009 if (stmt
&& !gimple_visited_p (stmt
))
7010 cfg_cleanup_needed
|= maybe_optimize_range_tests (stmt
);
7012 bool do_prev
= false;
7013 for (gsi
= gsi_last_bb (bb
);
7014 !gsi_end_p (gsi
); do_prev
? gsi_prev (&gsi
) : (void) 0)
7017 stmt
= gsi_stmt (gsi
);
7019 if (is_gimple_assign (stmt
)
7020 && !stmt_could_throw_p (cfun
, stmt
))
7022 tree lhs
, rhs1
, rhs2
;
7023 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
7025 /* If this was part of an already processed statement,
7026 we don't need to touch it again. */
7027 if (gimple_visited_p (stmt
))
7029 /* This statement might have become dead because of previous
7031 if (has_zero_uses (gimple_get_lhs (stmt
)))
7033 reassoc_remove_stmt (&gsi
);
7034 release_defs (stmt
);
7035 /* We might end up removing the last stmt above which
7036 places the iterator to the end of the sequence.
7037 Reset it to the last stmt in this case and make sure
7038 we don't do gsi_prev in that case. */
7039 if (gsi_end_p (gsi
))
7041 gsi
= gsi_last_bb (bb
);
7048 /* If this is not a gimple binary expression, there is
7049 nothing for us to do with it. */
7050 if (get_gimple_rhs_class (rhs_code
) != GIMPLE_BINARY_RHS
)
7053 lhs
= gimple_assign_lhs (stmt
);
7054 rhs1
= gimple_assign_rhs1 (stmt
);
7055 rhs2
= gimple_assign_rhs2 (stmt
);
7057 /* For non-bit or min/max operations we can't associate
7058 all types. Verify that here. */
7059 if ((rhs_code
!= BIT_IOR_EXPR
7060 && rhs_code
!= BIT_AND_EXPR
7061 && rhs_code
!= BIT_XOR_EXPR
7062 && rhs_code
!= MIN_EXPR
7063 && rhs_code
!= MAX_EXPR
7064 && !can_reassociate_type_p (TREE_TYPE (lhs
)))
7065 || !can_reassociate_op_p (rhs1
)
7066 || !can_reassociate_op_p (rhs2
))
7069 if (associative_tree_code (rhs_code
))
7071 auto_vec
<operand_entry
*> ops
;
7072 tree powi_result
= NULL_TREE
;
7073 bool is_vector
= VECTOR_TYPE_P (TREE_TYPE (lhs
));
7075 /* There may be no immediate uses left by the time we
7076 get here because we may have eliminated them all. */
7077 if (TREE_CODE (lhs
) == SSA_NAME
&& has_zero_uses (lhs
))
7080 gimple_set_visited (stmt
, true);
7081 linearize_expr_tree (&ops
, stmt
, true, true);
7082 ops
.qsort (sort_by_operand_rank
);
7083 int orig_len
= ops
.length ();
7084 optimize_ops_list (rhs_code
, &ops
);
7085 if (undistribute_ops_list (rhs_code
, &ops
,
7086 loop_containing_stmt (stmt
)))
7088 ops
.qsort (sort_by_operand_rank
);
7089 optimize_ops_list (rhs_code
, &ops
);
7091 if (undistribute_bitref_for_vector (rhs_code
, &ops
,
7092 loop_containing_stmt (stmt
)))
7094 ops
.qsort (sort_by_operand_rank
);
7095 optimize_ops_list (rhs_code
, &ops
);
7097 if (rhs_code
== PLUS_EXPR
7098 && transform_add_to_multiply (&ops
))
7099 ops
.qsort (sort_by_operand_rank
);
7101 if (rhs_code
== BIT_IOR_EXPR
|| rhs_code
== BIT_AND_EXPR
)
7104 optimize_vec_cond_expr (rhs_code
, &ops
);
7106 optimize_range_tests (rhs_code
, &ops
, NULL
);
7109 if (rhs_code
== MULT_EXPR
&& !is_vector
)
7111 attempt_builtin_copysign (&ops
);
7113 if (reassoc_insert_powi_p
7114 && (flag_unsafe_math_optimizations
7115 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs
)))))
7116 powi_result
= attempt_builtin_powi (stmt
, &ops
);
7119 operand_entry
*last
;
7120 bool negate_result
= false;
7121 if (ops
.length () > 1
7122 && rhs_code
== MULT_EXPR
)
7125 if ((integer_minus_onep (last
->op
)
7126 || real_minus_onep (last
->op
))
7127 && !HONOR_SNANS (TREE_TYPE (lhs
))
7128 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs
))
7129 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs
))))
7132 negate_result
= true;
7137 /* If the operand vector is now empty, all operands were
7138 consumed by the __builtin_powi optimization. */
7139 if (ops
.length () == 0)
7140 transform_stmt_to_copy (&gsi
, stmt
, powi_result
);
7141 else if (ops
.length () == 1)
7143 tree last_op
= ops
.last ()->op
;
7145 /* If the stmt that defines operand has to be inserted, insert it
7147 if (ops
.last ()->stmt_to_insert
)
7148 insert_stmt_before_use (stmt
, ops
.last ()->stmt_to_insert
);
7150 transform_stmt_to_multiply (&gsi
, stmt
, last_op
,
7153 transform_stmt_to_copy (&gsi
, stmt
, last_op
);
7157 machine_mode mode
= TYPE_MODE (TREE_TYPE (lhs
));
7158 int ops_num
= ops
.length ();
7162 /* For binary bit operations, if there are at least 3
7163 operands and the last operand in OPS is a constant,
7164 move it to the front. This helps ensure that we generate
7165 (X & Y) & C rather than (X & C) & Y. The former will
7166 often match a canonical bit test when we get to RTL. */
7167 if (ops
.length () > 2
7168 && (rhs_code
== BIT_AND_EXPR
7169 || rhs_code
== BIT_IOR_EXPR
7170 || rhs_code
== BIT_XOR_EXPR
)
7171 && TREE_CODE (ops
.last ()->op
) == INTEGER_CST
)
7172 std::swap (*ops
[0], *ops
[ops_num
- 1]);
7174 optimization_type opt_type
= bb_optimization_type (bb
);
7176 /* If the target support FMA, rank_ops_for_fma will detect if
7177 the chain has fmas and rearrange the ops if so. */
7178 if (direct_internal_fn_supported_p (IFN_FMA
,
7181 && (rhs_code
== PLUS_EXPR
|| rhs_code
== MINUS_EXPR
))
7183 mult_num
= rank_ops_for_fma (&ops
);
7186 /* Only rewrite the expression tree to parallel in the
7187 last reassoc pass to avoid useless work back-and-forth
7188 with initial linearization. */
7189 bool has_fma
= mult_num
>= 2 && mult_num
!= ops_num
;
7190 if (!reassoc_insert_powi_p
7191 && ops
.length () > 3
7192 && (width
= get_reassociation_width (&ops
, mult_num
, lhs
,
7196 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7198 "Width = %d was chosen for reassociation\n",
7200 rewrite_expr_tree_parallel (as_a
<gassign
*> (stmt
),
7207 /* When there are three operands left, we want
7208 to make sure the ones that get the double
7209 binary op are chosen wisely. */
7210 int len
= ops
.length ();
7213 /* width > 1 means ranking ops results in better
7214 parallelism. Check current value to avoid
7215 calling get_reassociation_width again. */
7217 && get_reassociation_width (
7218 &ops
, mult_num
, lhs
, rhs_code
, mode
)
7220 swap_ops_for_binary_stmt (ops
, len
- 3);
7222 new_lhs
= rewrite_expr_tree (stmt
, rhs_code
, 0, ops
,
7228 /* If we combined some repeated factors into a
7229 __builtin_powi call, multiply that result by the
7230 reassociated operands. */
7233 gimple
*mul_stmt
, *lhs_stmt
= SSA_NAME_DEF_STMT (lhs
);
7234 tree type
= TREE_TYPE (lhs
);
7235 tree target_ssa
= make_temp_ssa_name (type
, NULL
,
7237 gimple_set_lhs (lhs_stmt
, target_ssa
);
7238 update_stmt (lhs_stmt
);
7241 target_ssa
= new_lhs
;
7244 mul_stmt
= gimple_build_assign (lhs
, MULT_EXPR
,
7245 powi_result
, target_ssa
);
7246 gimple_set_location (mul_stmt
, gimple_location (stmt
));
7247 gimple_set_uid (mul_stmt
, gimple_uid (stmt
));
7248 gsi_insert_after (&gsi
, mul_stmt
, GSI_NEW_STMT
);
7254 stmt
= SSA_NAME_DEF_STMT (lhs
);
7255 tree tmp
= make_ssa_name (TREE_TYPE (lhs
));
7256 gimple_set_lhs (stmt
, tmp
);
7259 gassign
*neg_stmt
= gimple_build_assign (lhs
, NEGATE_EXPR
,
7261 gimple_set_uid (neg_stmt
, gimple_uid (stmt
));
7262 gsi_insert_after (&gsi
, neg_stmt
, GSI_NEW_STMT
);
7269 return cfg_cleanup_needed
;
7272 /* Add jumps around shifts for range tests turned into bit tests.
7273 For each SSA_NAME VAR we have code like:
7274 VAR = ...; // final stmt of range comparison
7275 // bit test here...;
7276 OTHERVAR = ...; // final stmt of the bit test sequence
7277 RES = VAR | OTHERVAR;
7278 Turn the above into:
7285 // bit test here...;
7288 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7296 FOR_EACH_VEC_ELT (reassoc_branch_fixups
, i
, var
)
7298 gimple
*def_stmt
= SSA_NAME_DEF_STMT (var
);
7301 bool ok
= single_imm_use (var
, &use
, &use_stmt
);
7303 && is_gimple_assign (use_stmt
)
7304 && gimple_assign_rhs_code (use_stmt
) == BIT_IOR_EXPR
7305 && gimple_bb (def_stmt
) == gimple_bb (use_stmt
));
7307 basic_block cond_bb
= gimple_bb (def_stmt
);
7308 basic_block then_bb
= split_block (cond_bb
, def_stmt
)->dest
;
7309 basic_block merge_bb
= split_block (then_bb
, use_stmt
)->dest
;
7311 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7312 gimple
*g
= gimple_build_cond (NE_EXPR
, var
,
7313 build_zero_cst (TREE_TYPE (var
)),
7314 NULL_TREE
, NULL_TREE
);
7315 location_t loc
= gimple_location (use_stmt
);
7316 gimple_set_location (g
, loc
);
7317 gsi_insert_after (&gsi
, g
, GSI_NEW_STMT
);
7319 edge etrue
= make_edge (cond_bb
, merge_bb
, EDGE_TRUE_VALUE
);
7320 etrue
->probability
= profile_probability::even ();
7321 edge efalse
= find_edge (cond_bb
, then_bb
);
7322 efalse
->flags
= EDGE_FALSE_VALUE
;
7323 efalse
->probability
-= etrue
->probability
;
7324 then_bb
->count
-= etrue
->count ();
7326 tree othervar
= NULL_TREE
;
7327 if (gimple_assign_rhs1 (use_stmt
) == var
)
7328 othervar
= gimple_assign_rhs2 (use_stmt
);
7329 else if (gimple_assign_rhs2 (use_stmt
) == var
)
7330 othervar
= gimple_assign_rhs1 (use_stmt
);
7333 tree lhs
= gimple_assign_lhs (use_stmt
);
7334 gphi
*phi
= create_phi_node (lhs
, merge_bb
);
7335 add_phi_arg (phi
, build_one_cst (TREE_TYPE (lhs
)), etrue
, loc
);
7336 add_phi_arg (phi
, othervar
, single_succ_edge (then_bb
), loc
);
7337 gsi
= gsi_for_stmt (use_stmt
);
7338 gsi_remove (&gsi
, true);
7340 set_immediate_dominator (CDI_DOMINATORS
, merge_bb
, cond_bb
);
7341 set_immediate_dominator (CDI_POST_DOMINATORS
, cond_bb
, merge_bb
);
7343 reassoc_branch_fixups
.release ();
7346 void dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
);
7347 void debug_ops_vector (vec
<operand_entry
*> ops
);
7349 /* Dump the operand entry vector OPS to FILE. */
7352 dump_ops_vector (FILE *file
, vec
<operand_entry
*> ops
)
7357 FOR_EACH_VEC_ELT (ops
, i
, oe
)
7359 fprintf (file
, "Op %d -> rank: %d, tree: ", i
, oe
->rank
);
7360 print_generic_expr (file
, oe
->op
);
7361 fprintf (file
, "\n");
7365 /* Dump the operand entry vector OPS to STDERR. */
7368 debug_ops_vector (vec
<operand_entry
*> ops
)
7370 dump_ops_vector (stderr
, ops
);
7373 /* Bubble up return status from reassociate_bb. */
7378 bool cfg_cleanup_needed
= false;
7379 basic_block
*worklist
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (cfun
));
7382 for (auto son
= first_dom_son (CDI_DOMINATORS
, ENTRY_BLOCK_PTR_FOR_FN (cfun
));
7383 son
; son
= next_dom_son (CDI_DOMINATORS
, son
))
7384 worklist
[sp
++] = son
;
7387 basic_block bb
= worklist
[--sp
];
7388 break_up_subtract_bb (bb
);
7389 for (auto son
= first_dom_son (CDI_DOMINATORS
, bb
);
7390 son
; son
= next_dom_son (CDI_DOMINATORS
, son
))
7391 worklist
[sp
++] = son
;
7394 for (auto son
= first_dom_son (CDI_POST_DOMINATORS
,
7395 EXIT_BLOCK_PTR_FOR_FN (cfun
));
7396 son
; son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
7397 worklist
[sp
++] = son
;
7400 basic_block bb
= worklist
[--sp
];
7401 cfg_cleanup_needed
|= reassociate_bb (bb
);
7402 for (auto son
= first_dom_son (CDI_POST_DOMINATORS
, bb
);
7403 son
; son
= next_dom_son (CDI_POST_DOMINATORS
, son
))
7404 worklist
[sp
++] = son
;
7408 return cfg_cleanup_needed
;
7411 /* Initialize the reassociation pass. */
7418 int *bbs
= XNEWVEC (int, n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
);
7420 /* Find the loops, so that we can prevent moving calculations in
7422 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
7424 memset (&reassociate_stats
, 0, sizeof (reassociate_stats
));
7426 next_operand_entry_id
= 0;
7428 /* Reverse RPO (Reverse Post Order) will give us something where
7429 deeper loops come later. */
7430 pre_and_rev_post_order_compute (NULL
, bbs
, false);
7431 bb_rank
= XCNEWVEC (int64_t, last_basic_block_for_fn (cfun
));
7432 operand_rank
= new hash_map
<tree
, int64_t>;
7434 /* Give each default definition a distinct rank. This includes
7435 parameters and the static chain. Walk backwards over all
7436 SSA names so that we get proper rank ordering according
7437 to tree_swap_operands_p. */
7438 for (i
= num_ssa_names
- 1; i
> 0; --i
)
7440 tree name
= ssa_name (i
);
7441 if (name
&& SSA_NAME_IS_DEFAULT_DEF (name
))
7442 insert_operand_rank (name
, ++rank
);
7445 /* Set up rank for each BB */
7446 for (i
= 0; i
< n_basic_blocks_for_fn (cfun
) - NUM_FIXED_BLOCKS
; i
++)
7447 bb_rank
[bbs
[i
]] = ++rank
<< 16;
7450 calculate_dominance_info (CDI_POST_DOMINATORS
);
7451 plus_negates
= vNULL
;
7452 mark_ssa_maybe_undefs ();
7455 /* Cleanup after the reassociation pass, and print stats if
7461 statistics_counter_event (cfun
, "Linearized",
7462 reassociate_stats
.linearized
);
7463 statistics_counter_event (cfun
, "Constants eliminated",
7464 reassociate_stats
.constants_eliminated
);
7465 statistics_counter_event (cfun
, "Ops eliminated",
7466 reassociate_stats
.ops_eliminated
);
7467 statistics_counter_event (cfun
, "Statements rewritten",
7468 reassociate_stats
.rewritten
);
7469 statistics_counter_event (cfun
, "Built-in pow[i] calls encountered",
7470 reassociate_stats
.pows_encountered
);
7471 statistics_counter_event (cfun
, "Built-in powi calls created",
7472 reassociate_stats
.pows_created
);
7474 delete operand_rank
;
7475 bitmap_clear (biased_names
);
7476 operand_entry_pool
.release ();
7478 plus_negates
.release ();
7479 free_dominance_info (CDI_POST_DOMINATORS
);
7480 loop_optimizer_finalize ();
7483 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7484 insertion of __builtin_powi calls.
7486 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7487 optimization of a gimple conditional. Otherwise returns zero. */
7490 execute_reassoc (bool insert_powi_p
, bool bias_loop_carried_phi_ranks_p
)
7492 reassoc_insert_powi_p
= insert_powi_p
;
7493 reassoc_bias_loop_carried_phi_ranks_p
= bias_loop_carried_phi_ranks_p
;
7497 bool cfg_cleanup_needed
;
7498 cfg_cleanup_needed
= do_reassoc ();
7499 repropagate_negates ();
7503 return cfg_cleanup_needed
? TODO_cleanup_cfg
: 0;
7508 const pass_data pass_data_reassoc
=
7510 GIMPLE_PASS
, /* type */
7511 "reassoc", /* name */
7512 OPTGROUP_NONE
, /* optinfo_flags */
7513 TV_TREE_REASSOC
, /* tv_id */
7514 ( PROP_cfg
| PROP_ssa
), /* properties_required */
7515 0, /* properties_provided */
7516 0, /* properties_destroyed */
7517 0, /* todo_flags_start */
7518 TODO_update_ssa_only_virtuals
, /* todo_flags_finish */
7521 class pass_reassoc
: public gimple_opt_pass
7524 pass_reassoc (gcc::context
*ctxt
)
7525 : gimple_opt_pass (pass_data_reassoc
, ctxt
), insert_powi_p (false)
7528 /* opt_pass methods: */
7529 opt_pass
* clone () final override
{ return new pass_reassoc (m_ctxt
); }
7530 void set_pass_param (unsigned int n
, bool param
) final override
7532 gcc_assert (n
== 0);
7533 insert_powi_p
= param
;
7534 bias_loop_carried_phi_ranks_p
= !param
;
7536 bool gate (function
*) final override
{ return flag_tree_reassoc
!= 0; }
7537 unsigned int execute (function
*) final override
7539 return execute_reassoc (insert_powi_p
, bias_loop_carried_phi_ranks_p
);
7543 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7544 point 3a in the pass header comment. */
7546 bool bias_loop_carried_phi_ranks_p
;
7547 }; // class pass_reassoc
7552 make_pass_reassoc (gcc::context
*ctxt
)
7554 return new pass_reassoc (ctxt
);