x86: Add a test for PR rtl-optimization/111673
[official-gcc.git] / gcc / tree-ssa-reassoc.cc
blob4a0dfa7c20517a133debfa114b58871c0529caab
1 /* Reassociation for trees.
2 Copyright (C) 2005-2025 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "target.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "cfghooks.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "memmodel.h"
33 #include "tm_p.h"
34 #include "ssa.h"
35 #include "optabs-tree.h"
36 #include "gimple-pretty-print.h"
37 #include "diagnostic-core.h"
38 #include "fold-const.h"
39 #include "stor-layout.h"
40 #include "cfganal.h"
41 #include "gimple-iterator.h"
42 #include "gimple-fold.h"
43 #include "tree-eh.h"
44 #include "gimplify-me.h"
45 #include "tree-cfg.h"
46 #include "tree-ssa-loop.h"
47 #include "flags.h"
48 #include "tree-ssa.h"
49 #include "langhooks.h"
50 #include "cfgloop.h"
51 #include "builtins.h"
52 #include "gimplify.h"
53 #include "case-cfn-macros.h"
54 #include "tree-ssa-reassoc.h"
55 #include "tree-ssa-math-opts.h"
56 #include "gimple-range.h"
57 #include "internal-fn.h"
59 /* This is a simple global reassociation pass. It is, in part, based
60 on the LLVM pass of the same name (They do some things more/less
61 than we do, in different orders, etc).
63 It consists of five steps:
65 1. Breaking up subtract operations into addition + negate, where
66 it would promote the reassociation of adds.
68 2. Left linearization of the expression trees, so that (A+B)+(C+D)
69 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
70 During linearization, we place the operands of the binary
71 expressions into a vector of operand_entry_*
73 3. Optimization of the operand lists, eliminating things like a +
74 -a, a & a, etc.
76 3a. Combine repeated factors with the same occurrence counts
77 into a __builtin_powi call that will later be optimized into
78 an optimal number of multiplies.
80 4. Rewrite the expression trees we linearized and optimized so
81 they are in proper rank order.
83 5. Repropagate negates, as nothing else will clean it up ATM.
85 A bit of theory on #4, since nobody seems to write anything down
86 about why it makes sense to do it the way they do it:
88 We could do this much nicer theoretically, but don't (for reasons
89 explained after how to do it theoretically nice :P).
91 In order to promote the most redundancy elimination, you want
92 binary expressions whose operands are the same rank (or
93 preferably, the same value) exposed to the redundancy eliminator,
94 for possible elimination.
96 So the way to do this if we really cared, is to build the new op
97 tree from the leaves to the roots, merging as you go, and putting the
98 new op on the end of the worklist, until you are left with one
99 thing on the worklist.
101 IE if you have to rewrite the following set of operands (listed with
102 rank in parentheses), with opcode PLUS_EXPR:
104 a (1), b (1), c (1), d (2), e (2)
107 We start with our merge worklist empty, and the ops list with all of
108 those on it.
110 You want to first merge all leaves of the same rank, as much as
111 possible.
113 So first build a binary op of
115 mergetmp = a + b, and put "mergetmp" on the merge worklist.
117 Because there is no three operand form of PLUS_EXPR, c is not going to
118 be exposed to redundancy elimination as a rank 1 operand.
120 So you might as well throw it on the merge worklist (you could also
121 consider it to now be a rank two operand, and merge it with d and e,
122 but in this case, you then have evicted e from a binary op. So at
123 least in this situation, you can't win.)
125 Then build a binary op of d + e
126 mergetmp2 = d + e
128 and put mergetmp2 on the merge worklist.
130 so merge worklist = {mergetmp, c, mergetmp2}
132 Continue building binary ops of these operations until you have only
133 one operation left on the worklist.
135 So we have
137 build binary op
138 mergetmp3 = mergetmp + c
140 worklist = {mergetmp2, mergetmp3}
142 mergetmp4 = mergetmp2 + mergetmp3
144 worklist = {mergetmp4}
146 because we have one operation left, we can now just set the original
147 statement equal to the result of that operation.
149 This will at least expose a + b and d + e to redundancy elimination
150 as binary operations.
152 For extra points, you can reuse the old statements to build the
153 mergetmps, since you shouldn't run out.
155 So why don't we do this?
157 Because it's expensive, and rarely will help. Most trees we are
158 reassociating have 3 or less ops. If they have 2 ops, they already
159 will be written into a nice single binary op. If you have 3 ops, a
160 single simple check suffices to tell you whether the first two are of the
161 same rank. If so, you know to order it
163 mergetmp = op1 + op2
164 newstmt = mergetmp + op3
166 instead of
167 mergetmp = op2 + op3
168 newstmt = mergetmp + op1
170 If all three are of the same rank, you can't expose them all in a
171 single binary operator anyway, so the above is *still* the best you
172 can do.
174 Thus, this is what we do. When we have three ops left, we check to see
175 what order to put them in, and call it a day. As a nod to vector sum
176 reduction, we check if any of the ops are really a phi node that is a
177 destructive update for the associating op, and keep the destructive
178 update together for vector sum reduction recognition. */
180 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
181 point 3a in the pass header comment. */
182 static bool reassoc_insert_powi_p;
184 /* Enable biasing ranks of loop accumulators. We don't want this before
185 vectorization, since it interferes with reduction chains. */
186 static bool reassoc_bias_loop_carried_phi_ranks_p;
188 /* Statistics */
189 static struct
191 int linearized;
192 int constants_eliminated;
193 int ops_eliminated;
194 int rewritten;
195 int pows_encountered;
196 int pows_created;
197 } reassociate_stats;
200 static object_allocator<operand_entry> operand_entry_pool
201 ("operand entry pool");
203 /* This is used to assign a unique ID to each struct operand_entry
204 so that qsort results are identical on different hosts. */
205 static unsigned int next_operand_entry_id;
207 /* Starting rank number for a given basic block, so that we can rank
208 operations using unmovable instructions in that BB based on the bb
209 depth. */
210 static int64_t *bb_rank;
212 /* Operand->rank hashtable. */
213 static hash_map<tree, int64_t> *operand_rank;
215 /* SSA_NAMEs that are forms of loop accumulators and whose ranks need to be
216 biased. */
217 static auto_bitmap biased_names;
219 /* Vector of SSA_NAMEs on which after reassociate_bb is done with
220 all basic blocks the CFG should be adjusted - basic blocks
221 split right after that SSA_NAME's definition statement and before
222 the only use, which must be a bit ior. */
223 static vec<tree> reassoc_branch_fixups;
225 /* Forward decls. */
226 static int64_t get_rank (tree);
227 static bool reassoc_stmt_dominates_stmt_p (gimple *, gimple *);
229 /* Wrapper around gsi_remove, which adjusts gimple_uid of debug stmts
230 possibly added by gsi_remove. */
232 static bool
233 reassoc_remove_stmt (gimple_stmt_iterator *gsi)
235 gimple *stmt = gsi_stmt (*gsi);
237 if (!MAY_HAVE_DEBUG_BIND_STMTS || gimple_code (stmt) == GIMPLE_PHI)
238 return gsi_remove (gsi, true);
240 gimple_stmt_iterator prev = *gsi;
241 gsi_prev (&prev);
242 unsigned uid = gimple_uid (stmt);
243 basic_block bb = gimple_bb (stmt);
244 bool ret = gsi_remove (gsi, true);
245 if (!gsi_end_p (prev))
246 gsi_next (&prev);
247 else
248 prev = gsi_start_bb (bb);
249 gimple *end_stmt = gsi_stmt (*gsi);
250 while ((stmt = gsi_stmt (prev)) != end_stmt)
252 gcc_assert (stmt && is_gimple_debug (stmt) && gimple_uid (stmt) == 0);
253 gimple_set_uid (stmt, uid);
254 gsi_next (&prev);
256 return ret;
259 /* Bias amount for loop-carried phis. We want this to be larger than
260 the depth of any reassociation tree we can see, but not larger than
261 the rank difference between two blocks. */
262 #define PHI_LOOP_BIAS (1 << 15)
264 /* Return TRUE iff PHI_LOOP_BIAS should be propagated from one of the STMT's
265 operands to the STMT's left-hand side. The goal is to preserve bias in code
266 like this:
268 x_1 = phi(x_0, x_2)
269 a = x_1 | 1
270 b = a ^ 2
271 .MEM = b
272 c = b + d
273 x_2 = c + e
275 That is, we need to preserve bias along single-use chains originating from
276 loop-carried phis. Only GIMPLE_ASSIGNs to SSA_NAMEs are considered to be
277 uses, because only they participate in rank propagation. */
278 static bool
279 propagate_bias_p (gimple *stmt)
281 use_operand_p use;
282 imm_use_iterator use_iter;
283 gimple *single_use_stmt = NULL;
285 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_reference)
286 return false;
288 FOR_EACH_IMM_USE_FAST (use, use_iter, gimple_assign_lhs (stmt))
290 gimple *current_use_stmt = USE_STMT (use);
292 if (is_gimple_assign (current_use_stmt)
293 && TREE_CODE (gimple_assign_lhs (current_use_stmt)) == SSA_NAME)
295 if (single_use_stmt != NULL && single_use_stmt != current_use_stmt)
296 return false;
297 single_use_stmt = current_use_stmt;
301 if (single_use_stmt == NULL)
302 return false;
304 if (gimple_bb (stmt)->loop_father
305 != gimple_bb (single_use_stmt)->loop_father)
306 return false;
308 return true;
311 /* Rank assigned to a phi statement. If STMT is a loop-carried phi of
312 an innermost loop, and the phi has only a single use which is inside
313 the loop, then the rank is the block rank of the loop latch plus an
314 extra bias for the loop-carried dependence. This causes expressions
315 calculated into an accumulator variable to be independent for each
316 iteration of the loop. If STMT is some other phi, the rank is the
317 block rank of its containing block. */
318 static int64_t
319 phi_rank (gimple *stmt)
321 basic_block bb = gimple_bb (stmt);
322 class loop *father = bb->loop_father;
323 tree res;
324 unsigned i;
325 use_operand_p use;
326 gimple *use_stmt;
328 if (!reassoc_bias_loop_carried_phi_ranks_p)
329 return bb_rank[bb->index];
331 /* We only care about real loops (those with a latch). */
332 if (!father->latch)
333 return bb_rank[bb->index];
335 /* Interesting phis must be in headers of innermost loops. */
336 if (bb != father->header
337 || father->inner)
338 return bb_rank[bb->index];
340 /* Ignore virtual SSA_NAMEs. */
341 res = gimple_phi_result (stmt);
342 if (virtual_operand_p (res))
343 return bb_rank[bb->index];
345 /* The phi definition must have a single use, and that use must be
346 within the loop. Otherwise this isn't an accumulator pattern. */
347 if (!single_imm_use (res, &use, &use_stmt)
348 || gimple_bb (use_stmt)->loop_father != father)
349 return bb_rank[bb->index];
351 /* Look for phi arguments from within the loop. If found, bias this phi. */
352 for (i = 0; i < gimple_phi_num_args (stmt); i++)
354 tree arg = gimple_phi_arg_def (stmt, i);
355 if (TREE_CODE (arg) == SSA_NAME
356 && !SSA_NAME_IS_DEFAULT_DEF (arg))
358 gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
359 if (gimple_bb (def_stmt)->loop_father == father)
360 return bb_rank[father->latch->index] + PHI_LOOP_BIAS;
364 /* Must be an uninteresting phi. */
365 return bb_rank[bb->index];
368 /* Return the maximum of RANK and the rank that should be propagated
369 from expression OP. For most operands, this is just the rank of OP.
370 For loop-carried phis, the value is zero to avoid undoing the bias
371 in favor of the phi. */
372 static int64_t
373 propagate_rank (int64_t rank, tree op, bool *maybe_biased_p)
375 int64_t op_rank;
377 op_rank = get_rank (op);
379 /* Check whether op is biased after the get_rank () call, since it might have
380 updated biased_names. */
381 if (TREE_CODE (op) == SSA_NAME
382 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (op)))
384 if (maybe_biased_p == NULL)
385 return rank;
386 *maybe_biased_p = true;
389 return MAX (rank, op_rank);
392 /* Look up the operand rank structure for expression E. */
394 static inline int64_t
395 find_operand_rank (tree e)
397 int64_t *slot = operand_rank->get (e);
398 return slot ? *slot : -1;
401 /* Insert {E,RANK} into the operand rank hashtable. */
403 static inline void
404 insert_operand_rank (tree e, int64_t rank)
406 gcc_assert (rank > 0);
407 bool existed = operand_rank->put (e, rank);
408 gcc_assert (!existed);
411 /* Given an expression E, return the rank of the expression. */
413 static int64_t
414 get_rank (tree e)
416 /* SSA_NAME's have the rank of the expression they are the result
418 For globals and uninitialized values, the rank is 0.
419 For function arguments, use the pre-setup rank.
420 For PHI nodes, stores, asm statements, etc, we use the rank of
421 the BB.
422 For simple operations, the rank is the maximum rank of any of
423 its operands, or the bb_rank, whichever is less.
424 I make no claims that this is optimal, however, it gives good
425 results. */
427 /* We make an exception to the normal ranking system to break
428 dependences of accumulator variables in loops. Suppose we
429 have a simple one-block loop containing:
431 x_1 = phi(x_0, x_2)
432 b = a + x_1
433 c = b + d
434 x_2 = c + e
436 As shown, each iteration of the calculation into x is fully
437 dependent upon the iteration before it. We would prefer to
438 see this in the form:
440 x_1 = phi(x_0, x_2)
441 b = a + d
442 c = b + e
443 x_2 = c + x_1
445 If the loop is unrolled, the calculations of b and c from
446 different iterations can be interleaved.
448 To obtain this result during reassociation, we bias the rank
449 of the phi definition x_1 upward, when it is recognized as an
450 accumulator pattern. The artificial rank causes it to be
451 added last, providing the desired independence. */
453 if (TREE_CODE (e) == SSA_NAME)
455 ssa_op_iter iter;
456 gimple *stmt;
457 int64_t rank;
458 tree op;
460 /* If we already have a rank for this expression, use that. */
461 rank = find_operand_rank (e);
462 if (rank != -1)
463 return rank;
465 stmt = SSA_NAME_DEF_STMT (e);
466 if (gimple_code (stmt) == GIMPLE_PHI)
468 rank = phi_rank (stmt);
469 if (rank != bb_rank[gimple_bb (stmt)->index])
470 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
473 else if (!is_gimple_assign (stmt))
474 rank = bb_rank[gimple_bb (stmt)->index];
476 else
478 bool biased_p = false;
479 bool *maybe_biased_p = propagate_bias_p (stmt) ? &biased_p : NULL;
481 /* Otherwise, find the maximum rank for the operands. As an
482 exception, remove the bias from loop-carried phis when propagating
483 the rank so that dependent operations are not also biased. */
484 /* Simply walk over all SSA uses - this takes advatage of the
485 fact that non-SSA operands are is_gimple_min_invariant and
486 thus have rank 0. */
487 rank = 0;
488 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
489 rank = propagate_rank (rank, op, maybe_biased_p);
491 rank += 1;
492 if (biased_p)
493 bitmap_set_bit (biased_names, SSA_NAME_VERSION (e));
496 if (dump_file && (dump_flags & TDF_DETAILS))
498 fprintf (dump_file, "Rank for ");
499 print_generic_expr (dump_file, e);
500 fprintf (dump_file, " is %" PRId64 "\n", rank);
503 /* Note the rank in the hashtable so we don't recompute it. */
504 insert_operand_rank (e, rank);
505 return rank;
508 /* Constants, globals, etc., are rank 0 */
509 return 0;
513 /* We want integer ones to end up last no matter what, since they are
514 the ones we can do the most with. */
515 #define INTEGER_CONST_TYPE 1 << 4
516 #define FLOAT_ONE_CONST_TYPE 1 << 3
517 #define FLOAT_CONST_TYPE 1 << 2
518 #define OTHER_CONST_TYPE 1 << 1
520 /* Classify an invariant tree into integer, float, or other, so that
521 we can sort them to be near other constants of the same type. */
522 static inline int
523 constant_type (tree t)
525 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
526 return INTEGER_CONST_TYPE;
527 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
529 /* Sort -1.0 and 1.0 constants last, while in some cases
530 const_binop can't optimize some inexact operations, multiplication
531 by -1.0 or 1.0 can be always merged with others. */
532 if (real_onep (t) || real_minus_onep (t))
533 return FLOAT_ONE_CONST_TYPE;
534 return FLOAT_CONST_TYPE;
536 else
537 return OTHER_CONST_TYPE;
540 /* qsort comparison function to sort operand entries PA and PB by rank
541 so that the sorted array is ordered by rank in decreasing order. */
542 static int
543 sort_by_operand_rank (const void *pa, const void *pb)
545 const operand_entry *oea = *(const operand_entry *const *)pa;
546 const operand_entry *oeb = *(const operand_entry *const *)pb;
548 if (oeb->rank != oea->rank)
549 return oeb->rank > oea->rank ? 1 : -1;
551 /* It's nicer for optimize_expression if constants that are likely
552 to fold when added/multiplied/whatever are put next to each
553 other. Since all constants have rank 0, order them by type. */
554 if (oea->rank == 0)
556 if (constant_type (oeb->op) != constant_type (oea->op))
557 return constant_type (oea->op) - constant_type (oeb->op);
558 else
559 /* To make sorting result stable, we use unique IDs to determine
560 order. */
561 return oeb->id > oea->id ? 1 : -1;
564 if (TREE_CODE (oea->op) != SSA_NAME)
566 if (TREE_CODE (oeb->op) != SSA_NAME)
567 return oeb->id > oea->id ? 1 : -1;
568 else
569 return 1;
571 else if (TREE_CODE (oeb->op) != SSA_NAME)
572 return -1;
574 /* Lastly, make sure the versions that are the same go next to each
575 other. */
576 if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op))
578 /* As SSA_NAME_VERSION is assigned pretty randomly, because we reuse
579 versions of removed SSA_NAMEs, so if possible, prefer to sort
580 based on basic block and gimple_uid of the SSA_NAME_DEF_STMT.
581 See PR60418. */
582 gimple *stmta = SSA_NAME_DEF_STMT (oea->op);
583 gimple *stmtb = SSA_NAME_DEF_STMT (oeb->op);
584 basic_block bba = gimple_bb (stmta);
585 basic_block bbb = gimple_bb (stmtb);
586 if (bbb != bba)
588 /* One of the SSA_NAMEs can be defined in oeN->stmt_to_insert
589 but the other might not. */
590 if (!bba)
591 return 1;
592 if (!bbb)
593 return -1;
594 /* If neither is, compare bb_rank. */
595 if (bb_rank[bbb->index] != bb_rank[bba->index])
596 return (bb_rank[bbb->index] >> 16) - (bb_rank[bba->index] >> 16);
599 bool da = reassoc_stmt_dominates_stmt_p (stmta, stmtb);
600 bool db = reassoc_stmt_dominates_stmt_p (stmtb, stmta);
601 if (da != db)
602 return da ? 1 : -1;
604 return SSA_NAME_VERSION (oeb->op) > SSA_NAME_VERSION (oea->op) ? 1 : -1;
607 return oeb->id > oea->id ? 1 : -1;
610 /* Add an operand entry to *OPS for the tree operand OP. */
612 static void
613 add_to_ops_vec (vec<operand_entry *> *ops, tree op, gimple *stmt_to_insert = NULL)
615 operand_entry *oe = operand_entry_pool.allocate ();
617 oe->op = op;
618 oe->rank = get_rank (op);
619 oe->id = next_operand_entry_id++;
620 oe->count = 1;
621 oe->stmt_to_insert = stmt_to_insert;
622 ops->safe_push (oe);
625 /* Add an operand entry to *OPS for the tree operand OP with repeat
626 count REPEAT. */
628 static void
629 add_repeat_to_ops_vec (vec<operand_entry *> *ops, tree op,
630 HOST_WIDE_INT repeat)
632 operand_entry *oe = operand_entry_pool.allocate ();
634 oe->op = op;
635 oe->rank = get_rank (op);
636 oe->id = next_operand_entry_id++;
637 oe->count = repeat;
638 oe->stmt_to_insert = NULL;
639 ops->safe_push (oe);
641 reassociate_stats.pows_encountered++;
644 /* Returns true if we can associate the SSA def OP. */
646 static bool
647 can_reassociate_op_p (tree op)
649 if (TREE_CODE (op) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (op))
650 return false;
651 /* Uninitialized variables can't participate in reassociation. */
652 if (TREE_CODE (op) == SSA_NAME && ssa_name_maybe_undef_p (op))
653 return false;
654 /* Make sure asm goto outputs do not participate in reassociation since
655 we have no way to find an insertion place after asm goto. */
656 if (TREE_CODE (op) == SSA_NAME
657 && gimple_code (SSA_NAME_DEF_STMT (op)) == GIMPLE_ASM
658 && gimple_asm_nlabels (as_a <gasm *> (SSA_NAME_DEF_STMT (op))) != 0)
659 return false;
660 return true;
663 /* Returns true if we can reassociate operations of TYPE.
664 That is for integral or non-saturating fixed-point types, and for
665 floating point type when associative-math is enabled. */
667 static bool
668 can_reassociate_type_p (tree type)
670 if ((ANY_INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
671 || NON_SAT_FIXED_POINT_TYPE_P (type)
672 || (flag_associative_math && FLOAT_TYPE_P (type)))
673 return true;
674 return false;
677 /* Return true if STMT is reassociable operation containing a binary
678 operation with tree code CODE, and is inside LOOP. */
680 static bool
681 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
683 basic_block bb = gimple_bb (stmt);
685 if (gimple_bb (stmt) == NULL)
686 return false;
688 if (!flow_bb_inside_loop_p (loop, bb))
689 return false;
691 if (is_gimple_assign (stmt)
692 && gimple_assign_rhs_code (stmt) == code
693 && has_single_use (gimple_assign_lhs (stmt)))
695 tree rhs1 = gimple_assign_rhs1 (stmt);
696 tree rhs2 = gimple_assign_rhs2 (stmt);
697 if (!can_reassociate_op_p (rhs1)
698 || (rhs2 && !can_reassociate_op_p (rhs2)))
699 return false;
700 return true;
703 return false;
707 /* Return true if STMT is a nop-conversion. */
709 static bool
710 gimple_nop_conversion_p (gimple *stmt)
712 if (gassign *ass = dyn_cast <gassign *> (stmt))
714 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (ass))
715 && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (ass)),
716 TREE_TYPE (gimple_assign_rhs1 (ass))))
717 return true;
719 return false;
722 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
723 operand of the negate operation. Otherwise, return NULL. */
725 static tree
726 get_unary_op (tree name, enum tree_code opcode)
728 gimple *stmt = SSA_NAME_DEF_STMT (name);
730 /* Look through nop conversions (sign changes). */
731 if (gimple_nop_conversion_p (stmt)
732 && TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME)
733 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
735 if (!is_gimple_assign (stmt))
736 return NULL_TREE;
738 if (gimple_assign_rhs_code (stmt) == opcode)
739 return gimple_assign_rhs1 (stmt);
740 return NULL_TREE;
743 /* Return true if OP1 and OP2 have the same value if casted to either type. */
745 static bool
746 ops_equal_values_p (tree op1, tree op2)
748 if (op1 == op2)
749 return true;
751 tree orig_op1 = op1;
752 if (TREE_CODE (op1) == SSA_NAME)
754 gimple *stmt = SSA_NAME_DEF_STMT (op1);
755 if (gimple_nop_conversion_p (stmt))
757 op1 = gimple_assign_rhs1 (stmt);
758 if (op1 == op2)
759 return true;
763 if (TREE_CODE (op2) == SSA_NAME)
765 gimple *stmt = SSA_NAME_DEF_STMT (op2);
766 if (gimple_nop_conversion_p (stmt))
768 op2 = gimple_assign_rhs1 (stmt);
769 if (op1 == op2
770 || orig_op1 == op2)
771 return true;
775 return false;
779 /* If CURR and LAST are a pair of ops that OPCODE allows us to
780 eliminate through equivalences, do so, remove them from OPS, and
781 return true. Otherwise, return false. */
783 static bool
784 eliminate_duplicate_pair (enum tree_code opcode,
785 vec<operand_entry *> *ops,
786 bool *all_done,
787 unsigned int i,
788 operand_entry *curr,
789 operand_entry *last)
792 /* If we have two of the same op, and the opcode is & |, min, or max,
793 we can eliminate one of them.
794 If we have two of the same op, and the opcode is ^, we can
795 eliminate both of them. */
797 if (last && last->op == curr->op)
799 switch (opcode)
801 case MAX_EXPR:
802 case MIN_EXPR:
803 case BIT_IOR_EXPR:
804 case BIT_AND_EXPR:
805 if (dump_file && (dump_flags & TDF_DETAILS))
807 fprintf (dump_file, "Equivalence: ");
808 print_generic_expr (dump_file, curr->op);
809 fprintf (dump_file, " [&|minmax] ");
810 print_generic_expr (dump_file, last->op);
811 fprintf (dump_file, " -> ");
812 print_generic_stmt (dump_file, last->op);
815 ops->ordered_remove (i);
816 reassociate_stats.ops_eliminated ++;
818 return true;
820 case BIT_XOR_EXPR:
821 if (dump_file && (dump_flags & TDF_DETAILS))
823 fprintf (dump_file, "Equivalence: ");
824 print_generic_expr (dump_file, curr->op);
825 fprintf (dump_file, " ^ ");
826 print_generic_expr (dump_file, last->op);
827 fprintf (dump_file, " -> nothing\n");
830 reassociate_stats.ops_eliminated += 2;
832 if (ops->length () == 2)
834 ops->truncate (0);
835 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op)));
836 *all_done = true;
838 else
840 ops->ordered_remove (i-1);
841 ops->ordered_remove (i-1);
844 return true;
846 default:
847 break;
850 return false;
853 static vec<tree> plus_negates;
855 /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not
856 expression, look in OPS for a corresponding positive operation to cancel
857 it out. If we find one, remove the other from OPS, replace
858 OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise,
859 return false. */
861 static bool
862 eliminate_plus_minus_pair (enum tree_code opcode,
863 vec<operand_entry *> *ops,
864 unsigned int currindex,
865 operand_entry *curr)
867 tree negateop;
868 tree notop;
869 unsigned int i;
870 operand_entry *oe;
872 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
873 return false;
875 negateop = get_unary_op (curr->op, NEGATE_EXPR);
876 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
877 if (negateop == NULL_TREE && notop == NULL_TREE)
878 return false;
880 /* Any non-negated version will have a rank that is one less than
881 the current rank. So once we hit those ranks, if we don't find
882 one, we can stop. */
884 for (i = currindex + 1;
885 ops->iterate (i, &oe)
886 && oe->rank >= curr->rank - 1 ;
887 i++)
889 if (negateop
890 && ops_equal_values_p (oe->op, negateop))
892 if (dump_file && (dump_flags & TDF_DETAILS))
894 fprintf (dump_file, "Equivalence: ");
895 print_generic_expr (dump_file, negateop);
896 fprintf (dump_file, " + -");
897 print_generic_expr (dump_file, oe->op);
898 fprintf (dump_file, " -> 0\n");
901 ops->ordered_remove (i);
902 add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op)));
903 ops->ordered_remove (currindex);
904 reassociate_stats.ops_eliminated ++;
906 return true;
908 else if (notop
909 && ops_equal_values_p (oe->op, notop))
911 tree op_type = TREE_TYPE (oe->op);
913 if (dump_file && (dump_flags & TDF_DETAILS))
915 fprintf (dump_file, "Equivalence: ");
916 print_generic_expr (dump_file, notop);
917 fprintf (dump_file, " + ~");
918 print_generic_expr (dump_file, oe->op);
919 fprintf (dump_file, " -> -1\n");
922 ops->ordered_remove (i);
923 add_to_ops_vec (ops, build_all_ones_cst (op_type));
924 ops->ordered_remove (currindex);
925 reassociate_stats.ops_eliminated ++;
927 return true;
931 /* If CURR->OP is a negate expr without nop conversion in a plus expr:
932 save it for later inspection in repropagate_negates(). */
933 if (negateop != NULL_TREE
934 && gimple_assign_rhs_code (SSA_NAME_DEF_STMT (curr->op)) == NEGATE_EXPR)
935 plus_negates.safe_push (curr->op);
937 return false;
940 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
941 bitwise not expression, look in OPS for a corresponding operand to
942 cancel it out. If we find one, remove the other from OPS, replace
943 OPS[CURRINDEX] with 0, and return true. Otherwise, return
944 false. */
946 static bool
947 eliminate_not_pairs (enum tree_code opcode,
948 vec<operand_entry *> *ops,
949 unsigned int currindex,
950 operand_entry *curr)
952 tree notop;
953 unsigned int i;
954 operand_entry *oe;
956 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
957 || TREE_CODE (curr->op) != SSA_NAME)
958 return false;
960 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
961 if (notop == NULL_TREE)
962 return false;
964 /* Any non-not version will have a rank that is one less than
965 the current rank. So once we hit those ranks, if we don't find
966 one, we can stop. */
968 for (i = currindex + 1;
969 ops->iterate (i, &oe)
970 && oe->rank >= curr->rank - 1;
971 i++)
973 if (oe->op == notop)
975 if (dump_file && (dump_flags & TDF_DETAILS))
977 fprintf (dump_file, "Equivalence: ");
978 print_generic_expr (dump_file, notop);
979 if (opcode == BIT_AND_EXPR)
980 fprintf (dump_file, " & ~");
981 else if (opcode == BIT_IOR_EXPR)
982 fprintf (dump_file, " | ~");
983 print_generic_expr (dump_file, oe->op);
984 if (opcode == BIT_AND_EXPR)
985 fprintf (dump_file, " -> 0\n");
986 else if (opcode == BIT_IOR_EXPR)
987 fprintf (dump_file, " -> -1\n");
990 if (opcode == BIT_AND_EXPR)
991 oe->op = build_zero_cst (TREE_TYPE (oe->op));
992 else if (opcode == BIT_IOR_EXPR)
993 oe->op = build_all_ones_cst (TREE_TYPE (oe->op));
995 reassociate_stats.ops_eliminated += ops->length () - 1;
996 ops->truncate (0);
997 ops->quick_push (oe);
998 return true;
1002 return false;
1005 /* Use constant value that may be present in OPS to try to eliminate
1006 operands. Note that this function is only really used when we've
1007 eliminated ops for other reasons, or merged constants. Across
1008 single statements, fold already does all of this, plus more. There
1009 is little point in duplicating logic, so I've only included the
1010 identities that I could ever construct testcases to trigger. */
1012 static void
1013 eliminate_using_constants (enum tree_code opcode,
1014 vec<operand_entry *> *ops)
1016 operand_entry *oelast = ops->last ();
1017 tree type = TREE_TYPE (oelast->op);
1019 if (oelast->rank == 0
1020 && (ANY_INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
1022 switch (opcode)
1024 case BIT_AND_EXPR:
1025 if (integer_zerop (oelast->op))
1027 if (ops->length () != 1)
1029 if (dump_file && (dump_flags & TDF_DETAILS))
1030 fprintf (dump_file, "Found & 0, removing all other ops\n");
1032 reassociate_stats.ops_eliminated += ops->length () - 1;
1034 ops->truncate (0);
1035 ops->quick_push (oelast);
1036 return;
1039 else if (integer_all_onesp (oelast->op))
1041 if (ops->length () != 1)
1043 if (dump_file && (dump_flags & TDF_DETAILS))
1044 fprintf (dump_file, "Found & -1, removing\n");
1045 ops->pop ();
1046 reassociate_stats.ops_eliminated++;
1049 break;
1050 case BIT_IOR_EXPR:
1051 if (integer_all_onesp (oelast->op))
1053 if (ops->length () != 1)
1055 if (dump_file && (dump_flags & TDF_DETAILS))
1056 fprintf (dump_file, "Found | -1, removing all other ops\n");
1058 reassociate_stats.ops_eliminated += ops->length () - 1;
1060 ops->truncate (0);
1061 ops->quick_push (oelast);
1062 return;
1065 else if (integer_zerop (oelast->op))
1067 if (ops->length () != 1)
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1070 fprintf (dump_file, "Found | 0, removing\n");
1071 ops->pop ();
1072 reassociate_stats.ops_eliminated++;
1075 break;
1076 case MULT_EXPR:
1077 if (integer_zerop (oelast->op)
1078 || (FLOAT_TYPE_P (type)
1079 && !HONOR_NANS (type)
1080 && !HONOR_SIGNED_ZEROS (type)
1081 && real_zerop (oelast->op)))
1083 if (ops->length () != 1)
1085 if (dump_file && (dump_flags & TDF_DETAILS))
1086 fprintf (dump_file, "Found * 0, removing all other ops\n");
1088 reassociate_stats.ops_eliminated += ops->length () - 1;
1089 ops->truncate (0);
1090 ops->quick_push (oelast);
1091 return;
1094 else if (integer_onep (oelast->op)
1095 || (FLOAT_TYPE_P (type)
1096 && !HONOR_SNANS (type)
1097 && real_onep (oelast->op)))
1099 if (ops->length () != 1)
1101 if (dump_file && (dump_flags & TDF_DETAILS))
1102 fprintf (dump_file, "Found * 1, removing\n");
1103 ops->pop ();
1104 reassociate_stats.ops_eliminated++;
1105 return;
1108 break;
1109 case BIT_XOR_EXPR:
1110 case PLUS_EXPR:
1111 case MINUS_EXPR:
1112 if (integer_zerop (oelast->op)
1113 || (FLOAT_TYPE_P (type)
1114 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
1115 && fold_real_zero_addition_p (type, 0, oelast->op,
1116 opcode == MINUS_EXPR)))
1118 if (ops->length () != 1)
1120 if (dump_file && (dump_flags & TDF_DETAILS))
1121 fprintf (dump_file, "Found [|^+] 0, removing\n");
1122 ops->pop ();
1123 reassociate_stats.ops_eliminated++;
1124 return;
1127 break;
1128 default:
1129 break;
1135 static void linearize_expr_tree (vec<operand_entry *> *, gimple *,
1136 bool, bool);
1138 /* Structure for tracking and counting operands. */
1139 struct oecount {
1140 unsigned int cnt;
1141 unsigned int id;
1142 enum tree_code oecode;
1143 tree op;
1147 /* The heap for the oecount hashtable and the sorted list of operands. */
1148 static vec<oecount> cvec;
1151 /* Oecount hashtable helpers. */
1153 struct oecount_hasher : int_hash <int, 0, 1>
1155 static inline hashval_t hash (int);
1156 static inline bool equal (int, int);
1159 /* Hash function for oecount. */
1161 inline hashval_t
1162 oecount_hasher::hash (int p)
1164 const oecount *c = &cvec[p - 42];
1165 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
1168 /* Comparison function for oecount. */
1170 inline bool
1171 oecount_hasher::equal (int p1, int p2)
1173 const oecount *c1 = &cvec[p1 - 42];
1174 const oecount *c2 = &cvec[p2 - 42];
1175 return c1->oecode == c2->oecode && c1->op == c2->op;
1178 /* Comparison function for qsort sorting oecount elements by count. */
1180 static int
1181 oecount_cmp (const void *p1, const void *p2)
1183 const oecount *c1 = (const oecount *)p1;
1184 const oecount *c2 = (const oecount *)p2;
1185 if (c1->cnt != c2->cnt)
1186 return c1->cnt > c2->cnt ? 1 : -1;
1187 else
1188 /* If counts are identical, use unique IDs to stabilize qsort. */
1189 return c1->id > c2->id ? 1 : -1;
1192 /* Return TRUE iff STMT represents a builtin call that raises OP
1193 to some exponent. */
1195 static bool
1196 stmt_is_power_of_op (gimple *stmt, tree op)
1198 if (!is_gimple_call (stmt))
1199 return false;
1201 switch (gimple_call_combined_fn (stmt))
1203 CASE_CFN_POW:
1204 CASE_CFN_POWI:
1205 return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0));
1207 default:
1208 return false;
1212 /* Given STMT which is a __builtin_pow* call, decrement its exponent
1213 in place and return the result. Assumes that stmt_is_power_of_op
1214 was previously called for STMT and returned TRUE. */
1216 static HOST_WIDE_INT
1217 decrement_power (gimple *stmt)
1219 REAL_VALUE_TYPE c, cint;
1220 HOST_WIDE_INT power;
1221 tree arg1;
1223 switch (gimple_call_combined_fn (stmt))
1225 CASE_CFN_POW:
1226 arg1 = gimple_call_arg (stmt, 1);
1227 c = TREE_REAL_CST (arg1);
1228 power = real_to_integer (&c) - 1;
1229 real_from_integer (&cint, VOIDmode, power, SIGNED);
1230 gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint));
1231 return power;
1233 CASE_CFN_POWI:
1234 arg1 = gimple_call_arg (stmt, 1);
1235 power = TREE_INT_CST_LOW (arg1) - 1;
1236 gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power));
1237 return power;
1239 default:
1240 gcc_unreachable ();
1244 /* Replace SSA defined by STMT and replace all its uses with new
1245 SSA. Also return the new SSA. */
1247 static tree
1248 make_new_ssa_for_def (gimple *stmt, enum tree_code opcode, tree op)
1250 gimple *use_stmt;
1251 use_operand_p use;
1252 imm_use_iterator iter;
1253 tree new_lhs, new_debug_lhs = NULL_TREE;
1254 tree lhs = gimple_get_lhs (stmt);
1256 new_lhs = make_ssa_name (TREE_TYPE (lhs));
1257 gimple_set_lhs (stmt, new_lhs);
1259 /* Also need to update GIMPLE_DEBUGs. */
1260 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
1262 tree repl = new_lhs;
1263 if (is_gimple_debug (use_stmt))
1265 if (new_debug_lhs == NULL_TREE)
1267 new_debug_lhs = build_debug_expr_decl (TREE_TYPE (lhs));
1268 gdebug *def_temp
1269 = gimple_build_debug_bind (new_debug_lhs,
1270 build2 (opcode, TREE_TYPE (lhs),
1271 new_lhs, op),
1272 stmt);
1273 gimple_set_uid (def_temp, gimple_uid (stmt));
1274 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1275 gsi_insert_after (&gsi, def_temp, GSI_SAME_STMT);
1277 repl = new_debug_lhs;
1279 FOR_EACH_IMM_USE_ON_STMT (use, iter)
1280 SET_USE (use, repl);
1281 update_stmt (use_stmt);
1283 return new_lhs;
1286 /* Replace all SSAs defined in STMTS_TO_FIX and replace its
1287 uses with new SSAs. Also do this for the stmt that defines DEF
1288 if *DEF is not OP. */
1290 static void
1291 make_new_ssa_for_all_defs (tree *def, enum tree_code opcode, tree op,
1292 vec<gimple *> &stmts_to_fix)
1294 unsigned i;
1295 gimple *stmt;
1297 if (*def != op
1298 && TREE_CODE (*def) == SSA_NAME
1299 && (stmt = SSA_NAME_DEF_STMT (*def))
1300 && gimple_code (stmt) != GIMPLE_NOP)
1301 *def = make_new_ssa_for_def (stmt, opcode, op);
1303 FOR_EACH_VEC_ELT (stmts_to_fix, i, stmt)
1304 make_new_ssa_for_def (stmt, opcode, op);
1307 /* Find the single immediate use of STMT's LHS, and replace it
1308 with OP. Remove STMT. If STMT's LHS is the same as *DEF,
1309 replace *DEF with OP as well. */
1311 static void
1312 propagate_op_to_single_use (tree op, gimple *stmt, tree *def)
1314 tree lhs;
1315 gimple *use_stmt;
1316 use_operand_p use;
1317 gimple_stmt_iterator gsi;
1319 if (is_gimple_call (stmt))
1320 lhs = gimple_call_lhs (stmt);
1321 else
1322 lhs = gimple_assign_lhs (stmt);
1324 gcc_assert (has_single_use (lhs));
1325 single_imm_use (lhs, &use, &use_stmt);
1326 if (lhs == *def)
1327 *def = op;
1328 SET_USE (use, op);
1329 if (TREE_CODE (op) != SSA_NAME)
1330 update_stmt (use_stmt);
1331 gsi = gsi_for_stmt (stmt);
1332 unlink_stmt_vdef (stmt);
1333 reassoc_remove_stmt (&gsi);
1334 release_defs (stmt);
1337 /* Walks the linear chain with result *DEF searching for an operation
1338 with operand OP and code OPCODE removing that from the chain. *DEF
1339 is updated if there is only one operand but no operation left. */
1341 static void
1342 zero_one_operation (tree *def, enum tree_code opcode, tree op)
1344 tree orig_def = *def;
1345 gimple *stmt = SSA_NAME_DEF_STMT (*def);
1346 /* PR72835 - Record the stmt chain that has to be updated such that
1347 we dont use the same LHS when the values computed are different. */
1348 auto_vec<gimple *, 64> stmts_to_fix;
1352 tree name;
1354 if (opcode == MULT_EXPR)
1356 if (stmt_is_power_of_op (stmt, op))
1358 if (decrement_power (stmt) == 1)
1360 if (stmts_to_fix.length () > 0)
1361 stmts_to_fix.pop ();
1362 propagate_op_to_single_use (op, stmt, def);
1364 break;
1366 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR)
1368 if (gimple_assign_rhs1 (stmt) == op)
1370 tree cst = build_minus_one_cst (TREE_TYPE (op));
1371 if (stmts_to_fix.length () > 0)
1372 stmts_to_fix.pop ();
1373 propagate_op_to_single_use (cst, stmt, def);
1374 break;
1376 else if (integer_minus_onep (op)
1377 || real_minus_onep (op))
1379 gimple_assign_set_rhs_code
1380 (stmt, TREE_CODE (gimple_assign_rhs1 (stmt)));
1381 break;
1386 name = gimple_assign_rhs1 (stmt);
1388 /* If this is the operation we look for and one of the operands
1389 is ours simply propagate the other operand into the stmts
1390 single use. */
1391 if (gimple_assign_rhs_code (stmt) == opcode
1392 && (name == op
1393 || gimple_assign_rhs2 (stmt) == op))
1395 if (name == op)
1396 name = gimple_assign_rhs2 (stmt);
1397 if (stmts_to_fix.length () > 0)
1398 stmts_to_fix.pop ();
1399 propagate_op_to_single_use (name, stmt, def);
1400 break;
1403 /* We might have a multiply of two __builtin_pow* calls, and
1404 the operand might be hiding in the rightmost one. Likewise
1405 this can happen for a negate. */
1406 if (opcode == MULT_EXPR
1407 && gimple_assign_rhs_code (stmt) == opcode
1408 && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME
1409 && has_single_use (gimple_assign_rhs2 (stmt)))
1411 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
1412 if (stmt_is_power_of_op (stmt2, op))
1414 if (decrement_power (stmt2) == 1)
1415 propagate_op_to_single_use (op, stmt2, def);
1416 else
1417 stmts_to_fix.safe_push (stmt2);
1418 break;
1420 else if (is_gimple_assign (stmt2)
1421 && gimple_assign_rhs_code (stmt2) == NEGATE_EXPR)
1423 if (gimple_assign_rhs1 (stmt2) == op)
1425 tree cst = build_minus_one_cst (TREE_TYPE (op));
1426 propagate_op_to_single_use (cst, stmt2, def);
1427 break;
1429 else if (integer_minus_onep (op)
1430 || real_minus_onep (op))
1432 stmts_to_fix.safe_push (stmt2);
1433 gimple_assign_set_rhs_code
1434 (stmt2, TREE_CODE (gimple_assign_rhs1 (stmt2)));
1435 break;
1440 /* Continue walking the chain. */
1441 gcc_assert (name != op
1442 && TREE_CODE (name) == SSA_NAME);
1443 stmt = SSA_NAME_DEF_STMT (name);
1444 stmts_to_fix.safe_push (stmt);
1446 while (1);
1448 if (stmts_to_fix.length () > 0 || *def == orig_def)
1449 make_new_ssa_for_all_defs (def, opcode, op, stmts_to_fix);
1452 /* Returns true if statement S1 dominates statement S2. Like
1453 stmt_dominates_stmt_p, but uses stmt UIDs to optimize. */
1455 static bool
1456 reassoc_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
1458 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1460 /* If bb1 is NULL, it should be a GIMPLE_NOP def stmt of an (D)
1461 SSA_NAME. Assume it lives at the beginning of function and
1462 thus dominates everything. */
1463 if (!bb1 || s1 == s2)
1464 return true;
1466 /* If bb2 is NULL, it doesn't dominate any stmt with a bb. */
1467 if (!bb2)
1468 return false;
1470 if (bb1 == bb2)
1472 /* PHIs in the same basic block are assumed to be
1473 executed all in parallel, if only one stmt is a PHI,
1474 it dominates the other stmt in the same basic block. */
1475 if (gimple_code (s1) == GIMPLE_PHI)
1476 return true;
1478 if (gimple_code (s2) == GIMPLE_PHI)
1479 return false;
1481 gcc_assert (gimple_uid (s1) && gimple_uid (s2));
1483 if (gimple_uid (s1) < gimple_uid (s2))
1484 return true;
1486 if (gimple_uid (s1) > gimple_uid (s2))
1487 return false;
1489 gimple_stmt_iterator gsi = gsi_for_stmt (s1);
1490 unsigned int uid = gimple_uid (s1);
1491 for (gsi_next (&gsi); !gsi_end_p (gsi); gsi_next (&gsi))
1493 gimple *s = gsi_stmt (gsi);
1494 if (gimple_uid (s) != uid)
1495 break;
1496 if (s == s2)
1497 return true;
1500 return false;
1503 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
1506 /* Insert STMT after INSERT_POINT. */
1508 static void
1509 insert_stmt_after (gimple *stmt, gimple *insert_point)
1511 gimple_stmt_iterator gsi;
1512 basic_block bb;
1514 if (gimple_code (insert_point) == GIMPLE_PHI)
1515 bb = gimple_bb (insert_point);
1516 else if (!stmt_ends_bb_p (insert_point))
1518 gsi = gsi_for_stmt (insert_point);
1519 gimple_set_uid (stmt, gimple_uid (insert_point));
1520 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1521 return;
1523 else if (gimple_code (insert_point) == GIMPLE_ASM
1524 && gimple_asm_nlabels (as_a <gasm *> (insert_point)) != 0)
1525 /* We have no idea where to insert - it depends on where the
1526 uses will be placed. */
1527 gcc_unreachable ();
1528 else
1529 /* We assume INSERT_POINT is a SSA_NAME_DEF_STMT of some SSA_NAME,
1530 thus if it must end a basic block, it should be a call that can
1531 throw, or some assignment that can throw. If it throws, the LHS
1532 of it will not be initialized though, so only valid places using
1533 the SSA_NAME should be dominated by the fallthru edge. */
1534 bb = find_fallthru_edge (gimple_bb (insert_point)->succs)->dest;
1535 gsi = gsi_after_labels (bb);
1536 if (gsi_end_p (gsi))
1538 gimple_stmt_iterator gsi2 = gsi_last_bb (bb);
1539 gimple_set_uid (stmt,
1540 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1542 else
1543 gimple_set_uid (stmt, gimple_uid (gsi_stmt (gsi)));
1544 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1547 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
1548 the result. Places the statement after the definition of either
1549 OP1 or OP2. Returns the new statement. */
1551 static gimple *
1552 build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode)
1554 gimple *op1def = NULL, *op2def = NULL;
1555 gimple_stmt_iterator gsi;
1556 tree op;
1557 gassign *sum;
1559 /* Create the addition statement. */
1560 op = make_ssa_name (type);
1561 sum = gimple_build_assign (op, opcode, op1, op2);
1563 /* Find an insertion place and insert. */
1564 if (TREE_CODE (op1) == SSA_NAME)
1565 op1def = SSA_NAME_DEF_STMT (op1);
1566 if (TREE_CODE (op2) == SSA_NAME)
1567 op2def = SSA_NAME_DEF_STMT (op2);
1568 if ((!op1def || gimple_nop_p (op1def))
1569 && (!op2def || gimple_nop_p (op2def)))
1571 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1572 if (!gsi_end_p (gsi)
1573 && is_gimple_call (gsi_stmt (gsi))
1574 && (gimple_call_flags (gsi_stmt (gsi)) & ECF_RETURNS_TWICE))
1576 /* Don't add statements before a returns_twice call at the start
1577 of a function. */
1578 split_edge (single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1579 gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1581 if (gsi_end_p (gsi))
1583 gimple_stmt_iterator gsi2
1584 = gsi_last_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
1585 gimple_set_uid (sum,
1586 gsi_end_p (gsi2) ? 1 : gimple_uid (gsi_stmt (gsi2)));
1588 else
1589 gimple_set_uid (sum, gimple_uid (gsi_stmt (gsi)));
1590 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
1592 else
1594 gimple *insert_point;
1595 if ((!op1def || gimple_nop_p (op1def))
1596 || (op2def && !gimple_nop_p (op2def)
1597 && reassoc_stmt_dominates_stmt_p (op1def, op2def)))
1598 insert_point = op2def;
1599 else
1600 insert_point = op1def;
1601 insert_stmt_after (sum, insert_point);
1603 update_stmt (sum);
1605 return sum;
1608 /* Perform un-distribution of divisions and multiplications.
1609 A * X + B * X is transformed into (A + B) * X and A / X + B / X
1610 to (A + B) / X for real X.
1612 The algorithm is organized as follows.
1614 - First we walk the addition chain *OPS looking for summands that
1615 are defined by a multiplication or a real division. This results
1616 in the candidates bitmap with relevant indices into *OPS.
1618 - Second we build the chains of multiplications or divisions for
1619 these candidates, counting the number of occurrences of (operand, code)
1620 pairs in all of the candidates chains.
1622 - Third we sort the (operand, code) pairs by number of occurrence and
1623 process them starting with the pair with the most uses.
1625 * For each such pair we walk the candidates again to build a
1626 second candidate bitmap noting all multiplication/division chains
1627 that have at least one occurrence of (operand, code).
1629 * We build an alternate addition chain only covering these
1630 candidates with one (operand, code) operation removed from their
1631 multiplication/division chain.
1633 * The first candidate gets replaced by the alternate addition chain
1634 multiplied/divided by the operand.
1636 * All candidate chains get disabled for further processing and
1637 processing of (operand, code) pairs continues.
1639 The alternate addition chains built are re-processed by the main
1640 reassociation algorithm which allows optimizing a * x * y + b * y * x
1641 to (a + b ) * x * y in one invocation of the reassociation pass. */
1643 static bool
1644 undistribute_ops_list (enum tree_code opcode,
1645 vec<operand_entry *> *ops, class loop *loop)
1647 unsigned int length = ops->length ();
1648 operand_entry *oe1;
1649 unsigned i, j;
1650 unsigned nr_candidates, nr_candidates2;
1651 sbitmap_iterator sbi0;
1652 vec<operand_entry *> *subops;
1653 bool changed = false;
1654 unsigned int next_oecount_id = 0;
1656 if (length <= 1
1657 || opcode != PLUS_EXPR)
1658 return false;
1660 /* Build a list of candidates to process. */
1661 auto_sbitmap candidates (length);
1662 bitmap_clear (candidates);
1663 nr_candidates = 0;
1664 FOR_EACH_VEC_ELT (*ops, i, oe1)
1666 enum tree_code dcode;
1667 gimple *oe1def;
1669 if (TREE_CODE (oe1->op) != SSA_NAME)
1670 continue;
1671 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1672 if (!is_gimple_assign (oe1def))
1673 continue;
1674 dcode = gimple_assign_rhs_code (oe1def);
1675 if ((dcode != MULT_EXPR
1676 && dcode != RDIV_EXPR)
1677 || !is_reassociable_op (oe1def, dcode, loop))
1678 continue;
1680 bitmap_set_bit (candidates, i);
1681 nr_candidates++;
1684 if (nr_candidates < 2)
1685 return false;
1687 if (dump_file && (dump_flags & TDF_DETAILS))
1689 fprintf (dump_file, "searching for un-distribute opportunities ");
1690 print_generic_expr (dump_file,
1691 (*ops)[bitmap_first_set_bit (candidates)]->op, TDF_NONE);
1692 fprintf (dump_file, " %d\n", nr_candidates);
1695 /* Build linearized sub-operand lists and the counting table. */
1696 cvec.create (0);
1698 hash_table<oecount_hasher> ctable (15);
1700 /* ??? Macro arguments cannot have multi-argument template types in
1701 them. This typedef is needed to workaround that limitation. */
1702 typedef vec<operand_entry *> vec_operand_entry_t_heap;
1703 subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ());
1704 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1706 gimple *oedef;
1707 enum tree_code oecode;
1708 unsigned j;
1710 oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op);
1711 oecode = gimple_assign_rhs_code (oedef);
1712 linearize_expr_tree (&subops[i], oedef,
1713 associative_tree_code (oecode), false);
1715 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1717 oecount c;
1718 int *slot;
1719 int idx;
1720 c.oecode = oecode;
1721 c.cnt = 1;
1722 c.id = next_oecount_id++;
1723 c.op = oe1->op;
1724 cvec.safe_push (c);
1725 idx = cvec.length () + 41;
1726 slot = ctable.find_slot (idx, INSERT);
1727 if (!*slot)
1729 *slot = idx;
1731 else
1733 cvec.pop ();
1734 cvec[*slot - 42].cnt++;
1739 /* Sort the counting table. */
1740 cvec.qsort (oecount_cmp);
1742 if (dump_file && (dump_flags & TDF_DETAILS))
1744 oecount *c;
1745 fprintf (dump_file, "Candidates:\n");
1746 FOR_EACH_VEC_ELT (cvec, j, c)
1748 fprintf (dump_file, " %u %s: ", c->cnt,
1749 c->oecode == MULT_EXPR
1750 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
1751 print_generic_expr (dump_file, c->op);
1752 fprintf (dump_file, "\n");
1756 /* Process the (operand, code) pairs in order of most occurrence. */
1757 auto_sbitmap candidates2 (length);
1758 while (!cvec.is_empty ())
1760 oecount *c = &cvec.last ();
1761 if (c->cnt < 2)
1762 break;
1764 /* Now collect the operands in the outer chain that contain
1765 the common operand in their inner chain. */
1766 bitmap_clear (candidates2);
1767 nr_candidates2 = 0;
1768 EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0)
1770 gimple *oedef;
1771 enum tree_code oecode;
1772 unsigned j;
1773 tree op = (*ops)[i]->op;
1775 /* If we undistributed in this chain already this may be
1776 a constant. */
1777 if (TREE_CODE (op) != SSA_NAME)
1778 continue;
1780 oedef = SSA_NAME_DEF_STMT (op);
1781 oecode = gimple_assign_rhs_code (oedef);
1782 if (oecode != c->oecode)
1783 continue;
1785 FOR_EACH_VEC_ELT (subops[i], j, oe1)
1787 if (oe1->op == c->op)
1789 bitmap_set_bit (candidates2, i);
1790 ++nr_candidates2;
1791 break;
1796 if (nr_candidates2 >= 2)
1798 operand_entry *oe1, *oe2;
1799 gimple *prod;
1800 int first = bitmap_first_set_bit (candidates2);
1802 /* Build the new addition chain. */
1803 oe1 = (*ops)[first];
1804 if (dump_file && (dump_flags & TDF_DETAILS))
1806 fprintf (dump_file, "Building (");
1807 print_generic_expr (dump_file, oe1->op);
1809 zero_one_operation (&oe1->op, c->oecode, c->op);
1810 EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0)
1812 gimple *sum;
1813 oe2 = (*ops)[i];
1814 if (dump_file && (dump_flags & TDF_DETAILS))
1816 fprintf (dump_file, " + ");
1817 print_generic_expr (dump_file, oe2->op);
1819 zero_one_operation (&oe2->op, c->oecode, c->op);
1820 sum = build_and_add_sum (TREE_TYPE (oe1->op),
1821 oe1->op, oe2->op, opcode);
1822 oe2->op = build_zero_cst (TREE_TYPE (oe2->op));
1823 oe2->rank = 0;
1824 oe1->op = gimple_get_lhs (sum);
1827 /* Apply the multiplication/division. */
1828 prod = build_and_add_sum (TREE_TYPE (oe1->op),
1829 oe1->op, c->op, c->oecode);
1830 if (dump_file && (dump_flags & TDF_DETAILS))
1832 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
1833 print_generic_expr (dump_file, c->op);
1834 fprintf (dump_file, "\n");
1837 /* Record it in the addition chain and disable further
1838 undistribution with this op. */
1839 oe1->op = gimple_assign_lhs (prod);
1840 oe1->rank = get_rank (oe1->op);
1841 subops[first].release ();
1843 changed = true;
1846 cvec.pop ();
1849 for (i = 0; i < ops->length (); ++i)
1850 subops[i].release ();
1851 free (subops);
1852 cvec.release ();
1854 return changed;
1857 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1858 first: element index for each relevant BIT_FIELD_REF.
1859 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1860 typedef std::pair<unsigned, unsigned> v_info_elem;
1861 struct v_info {
1862 tree vec_type;
1863 auto_vec<v_info_elem, 32> vec;
1865 typedef v_info *v_info_ptr;
1867 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1868 static int
1869 sort_by_mach_mode (const void *p_i, const void *p_j)
1871 const tree tr1 = *((const tree *) p_i);
1872 const tree tr2 = *((const tree *) p_j);
1873 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1874 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1875 if (mode1 > mode2)
1876 return 1;
1877 else if (mode1 < mode2)
1878 return -1;
1879 if (SSA_NAME_VERSION (tr1) < SSA_NAME_VERSION (tr2))
1880 return -1;
1881 else if (SSA_NAME_VERSION (tr1) > SSA_NAME_VERSION (tr2))
1882 return 1;
1883 return 0;
1886 /* Cleanup hash map for VECTOR information. */
1887 static void
1888 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1890 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1891 it != info_map.end (); ++it)
1893 v_info_ptr info = (*it).second;
1894 delete info;
1895 (*it).second = NULL;
1899 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1900 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1901 is transformed to
1902 Vs = (V1 + V2 + ... + Vn)
1903 Vs[0] + Vs[1] + ... + Vs[k]
1905 The basic steps are listed below:
1907 1) Check the addition chain *OPS by looking those summands coming from
1908 VECTOR bit_field_ref on VECTOR type. Put the information into
1909 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1911 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1912 continuous, they can cover the whole VECTOR perfectly without any holes.
1913 Obtain one VECTOR list which contain candidates to be transformed.
1915 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1916 candidates with same mode, build the addition statements for them and
1917 generate BIT_FIELD_REFs accordingly.
1919 TODO:
1920 The current implementation requires the whole VECTORs should be fully
1921 covered, but it can be extended to support partial, checking adjacent
1922 but not fill the whole, it may need some cost model to define the
1923 boundary to do or not.
1925 static bool
1926 undistribute_bitref_for_vector (enum tree_code opcode,
1927 vec<operand_entry *> *ops, struct loop *loop)
1929 if (ops->length () <= 1)
1930 return false;
1932 if (opcode != PLUS_EXPR
1933 && opcode != MULT_EXPR
1934 && opcode != BIT_XOR_EXPR
1935 && opcode != BIT_IOR_EXPR
1936 && opcode != BIT_AND_EXPR)
1937 return false;
1939 hash_map<tree, v_info_ptr> v_info_map;
1940 operand_entry *oe1;
1941 unsigned i;
1943 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1944 information into map. */
1945 FOR_EACH_VEC_ELT (*ops, i, oe1)
1947 enum tree_code dcode;
1948 gimple *oe1def;
1950 if (TREE_CODE (oe1->op) != SSA_NAME)
1951 continue;
1952 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1953 if (!is_gimple_assign (oe1def))
1954 continue;
1955 dcode = gimple_assign_rhs_code (oe1def);
1956 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1957 continue;
1959 tree rhs = gimple_assign_rhs1 (oe1def);
1960 tree vec = TREE_OPERAND (rhs, 0);
1961 tree vec_type = TREE_TYPE (vec);
1963 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1964 continue;
1966 /* Ignore it if target machine can't support this VECTOR type. */
1967 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1968 continue;
1970 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1971 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1972 continue;
1974 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1975 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1976 continue;
1978 /* The type of BIT_FIELD_REF might not be equal to the element type of
1979 the vector. We want to use a vector type with element type the
1980 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1981 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1983 machine_mode simd_mode;
1984 unsigned HOST_WIDE_INT size, nunits;
1985 unsigned HOST_WIDE_INT elem_size
1986 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1987 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1988 continue;
1989 if (size <= elem_size || (size % elem_size) != 0)
1990 continue;
1991 nunits = size / elem_size;
1992 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1993 nunits).exists (&simd_mode))
1994 continue;
1995 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1997 /* Ignore it if target machine can't support this VECTOR type. */
1998 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1999 continue;
2001 /* Check const vector type, constrain BIT_FIELD_REF offset and
2002 size. */
2003 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
2004 continue;
2006 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
2007 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
2008 continue;
2011 tree elem_type = TREE_TYPE (vec_type);
2012 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
2013 if (maybe_ne (bit_field_size (rhs), elem_size))
2014 continue;
2016 unsigned idx;
2017 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
2018 continue;
2020 /* Ignore it if target machine can't support this type of VECTOR
2021 operation. */
2022 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
2023 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
2024 continue;
2026 bool existed;
2027 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
2028 if (!existed)
2030 info = new v_info;
2031 info->vec_type = vec_type;
2033 else if (!types_compatible_p (vec_type, info->vec_type))
2034 continue;
2035 info->vec.safe_push (std::make_pair (idx, i));
2038 /* At least two VECTOR to combine. */
2039 if (v_info_map.elements () <= 1)
2041 cleanup_vinfo_map (v_info_map);
2042 return false;
2045 /* Verify all VECTOR candidates by checking two conditions:
2046 1) sorted offsets are adjacent, no holes.
2047 2) can fill the whole VECTOR perfectly.
2048 And add the valid candidates to a vector for further handling. */
2049 auto_vec<tree> valid_vecs (v_info_map.elements ());
2050 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
2051 it != v_info_map.end (); ++it)
2053 tree cand_vec = (*it).first;
2054 v_info_ptr cand_info = (*it).second;
2055 unsigned int num_elems
2056 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
2057 if (cand_info->vec.length () != num_elems)
2058 continue;
2059 sbitmap holes = sbitmap_alloc (num_elems);
2060 bitmap_ones (holes);
2061 bool valid = true;
2062 v_info_elem *curr;
2063 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
2065 if (!bitmap_bit_p (holes, curr->first))
2067 valid = false;
2068 break;
2070 else
2071 bitmap_clear_bit (holes, curr->first);
2073 if (valid && bitmap_empty_p (holes))
2074 valid_vecs.quick_push (cand_vec);
2075 sbitmap_free (holes);
2078 /* At least two VECTOR to combine. */
2079 if (valid_vecs.length () <= 1)
2081 cleanup_vinfo_map (v_info_map);
2082 return false;
2085 valid_vecs.qsort (sort_by_mach_mode);
2086 /* Go through all candidates by machine mode order, query the mode_to_total
2087 to get the total number for each mode and skip the single one. */
2088 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2090 tree tvec = valid_vecs[i];
2091 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2093 /* Skip modes with only a single candidate. */
2094 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2095 continue;
2097 unsigned int idx, j;
2098 gimple *sum = NULL;
2099 tree sum_vec = tvec;
2100 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2101 v_info_elem *elem;
2102 tree vec_type = info_ptr->vec_type;
2104 /* Build the sum for all candidates with same mode. */
2107 sum = build_and_add_sum (vec_type, sum_vec,
2108 valid_vecs[i + 1], opcode);
2109 /* Update the operands only after build_and_add_sum,
2110 so that we don't have to repeat the placement algorithm
2111 of build_and_add_sum. */
2112 if (sum_vec == tvec
2113 && !useless_type_conversion_p (vec_type, TREE_TYPE (sum_vec)))
2115 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2116 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2117 tree lhs = make_ssa_name (vec_type);
2118 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2119 gimple_set_uid (g, gimple_uid (sum));
2120 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2121 gimple_assign_set_rhs1 (sum, lhs);
2122 update_stmt (sum);
2124 if (!useless_type_conversion_p (vec_type,
2125 TREE_TYPE (valid_vecs[i + 1])))
2127 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2128 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2129 valid_vecs[i + 1]);
2130 tree lhs = make_ssa_name (vec_type);
2131 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2132 gimple_set_uid (g, gimple_uid (sum));
2133 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2134 gimple_assign_set_rhs2 (sum, lhs);
2135 update_stmt (sum);
2137 sum_vec = gimple_get_lhs (sum);
2138 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2139 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2140 /* Update those related ops of current candidate VECTOR. */
2141 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2143 idx = elem->second;
2144 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2145 /* Set this then op definition will get DCEd later. */
2146 gimple_set_visited (def, true);
2147 if (opcode == PLUS_EXPR
2148 || opcode == BIT_XOR_EXPR
2149 || opcode == BIT_IOR_EXPR)
2150 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2151 else if (opcode == MULT_EXPR)
2152 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2153 else
2155 gcc_assert (opcode == BIT_AND_EXPR);
2156 (*ops)[idx]->op
2157 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2159 (*ops)[idx]->rank = 0;
2161 if (dump_file && (dump_flags & TDF_DETAILS))
2163 fprintf (dump_file, "Generating addition -> ");
2164 print_gimple_stmt (dump_file, sum, 0);
2166 i++;
2168 while ((i < valid_vecs.length () - 1)
2169 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2171 /* Referring to first valid VECTOR with this mode, generate the
2172 BIT_FIELD_REF statements accordingly. */
2173 info_ptr = *(v_info_map.get (tvec));
2174 gcc_assert (sum);
2175 tree elem_type = TREE_TYPE (vec_type);
2176 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2178 idx = elem->second;
2179 tree dst = make_ssa_name (elem_type);
2180 tree pos = bitsize_int (elem->first
2181 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2182 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2183 TYPE_SIZE (elem_type), pos);
2184 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2185 insert_stmt_after (gs, sum);
2186 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2187 /* Set this then op definition will get DCEd later. */
2188 gimple_set_visited (def, true);
2189 (*ops)[idx]->op = gimple_assign_lhs (gs);
2190 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2191 if (dump_file && (dump_flags & TDF_DETAILS))
2193 fprintf (dump_file, "Generating bit_field_ref -> ");
2194 print_gimple_stmt (dump_file, gs, 0);
2199 if (dump_file && (dump_flags & TDF_DETAILS))
2200 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2202 cleanup_vinfo_map (v_info_map);
2204 return true;
2207 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
2208 expression, examine the other OPS to see if any of them are comparisons
2209 of the same values, which we may be able to combine or eliminate.
2210 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
2212 static bool
2213 eliminate_redundant_comparison (enum tree_code opcode,
2214 vec<operand_entry *> *ops,
2215 unsigned int currindex,
2216 operand_entry *curr)
2218 tree op1, op2;
2219 enum tree_code lcode, rcode;
2220 gimple *def1, *def2;
2221 int i;
2222 operand_entry *oe;
2224 if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
2225 return false;
2227 /* Check that CURR is a comparison. */
2228 if (TREE_CODE (curr->op) != SSA_NAME)
2229 return false;
2230 def1 = SSA_NAME_DEF_STMT (curr->op);
2231 if (!is_gimple_assign (def1))
2232 return false;
2233 lcode = gimple_assign_rhs_code (def1);
2234 if (TREE_CODE_CLASS (lcode) != tcc_comparison)
2235 return false;
2236 op1 = gimple_assign_rhs1 (def1);
2237 op2 = gimple_assign_rhs2 (def1);
2239 /* Now look for a similar comparison in the remaining OPS. */
2240 for (i = currindex + 1; ops->iterate (i, &oe); i++)
2242 tree t;
2244 if (TREE_CODE (oe->op) != SSA_NAME)
2245 continue;
2246 def2 = SSA_NAME_DEF_STMT (oe->op);
2247 if (!is_gimple_assign (def2))
2248 continue;
2249 rcode = gimple_assign_rhs_code (def2);
2250 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
2251 continue;
2253 /* If we got here, we have a match. See if we can combine the
2254 two comparisons. */
2255 tree type = TREE_TYPE (gimple_assign_lhs (def1));
2256 if (opcode == BIT_IOR_EXPR)
2257 t = maybe_fold_or_comparisons (type,
2258 lcode, op1, op2,
2259 rcode, gimple_assign_rhs1 (def2),
2260 gimple_assign_rhs2 (def2));
2261 else
2262 t = maybe_fold_and_comparisons (type,
2263 lcode, op1, op2,
2264 rcode, gimple_assign_rhs1 (def2),
2265 gimple_assign_rhs2 (def2));
2266 if (!t)
2267 continue;
2269 /* maybe_fold_and_comparisons and maybe_fold_or_comparisons
2270 always give us a boolean_type_node value back. If the original
2271 BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type,
2272 we need to convert. */
2273 if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t)))
2275 if (!fold_convertible_p (TREE_TYPE (curr->op), t))
2276 continue;
2277 t = fold_convert (TREE_TYPE (curr->op), t);
2280 if (TREE_CODE (t) != INTEGER_CST
2281 && !operand_equal_p (t, curr->op, 0))
2283 enum tree_code subcode;
2284 tree newop1, newop2;
2285 if (!COMPARISON_CLASS_P (t))
2286 continue;
2287 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2288 STRIP_USELESS_TYPE_CONVERSION (newop1);
2289 STRIP_USELESS_TYPE_CONVERSION (newop2);
2290 if (!is_gimple_val (newop1) || !is_gimple_val (newop2))
2291 continue;
2292 if (lcode == TREE_CODE (t)
2293 && operand_equal_p (op1, newop1, 0)
2294 && operand_equal_p (op2, newop2, 0))
2295 t = curr->op;
2296 else if ((TREE_CODE (newop1) == SSA_NAME
2297 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop1))
2298 || (TREE_CODE (newop2) == SSA_NAME
2299 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (newop2)))
2300 continue;
2303 if (dump_file && (dump_flags & TDF_DETAILS))
2305 fprintf (dump_file, "Equivalence: ");
2306 print_generic_expr (dump_file, curr->op);
2307 fprintf (dump_file, " %s ", op_symbol_code (opcode));
2308 print_generic_expr (dump_file, oe->op);
2309 fprintf (dump_file, " -> ");
2310 print_generic_expr (dump_file, t);
2311 fprintf (dump_file, "\n");
2314 /* Now we can delete oe, as it has been subsumed by the new combined
2315 expression t. */
2316 ops->ordered_remove (i);
2317 reassociate_stats.ops_eliminated ++;
2319 /* If t is the same as curr->op, we're done. Otherwise we must
2320 replace curr->op with t. Special case is if we got a constant
2321 back, in which case we add it to the end instead of in place of
2322 the current entry. */
2323 if (TREE_CODE (t) == INTEGER_CST)
2325 ops->ordered_remove (currindex);
2326 add_to_ops_vec (ops, t);
2328 else if (!operand_equal_p (t, curr->op, 0))
2330 gimple *sum;
2331 enum tree_code subcode;
2332 tree newop1;
2333 tree newop2;
2334 gcc_assert (COMPARISON_CLASS_P (t));
2335 extract_ops_from_tree (t, &subcode, &newop1, &newop2);
2336 STRIP_USELESS_TYPE_CONVERSION (newop1);
2337 STRIP_USELESS_TYPE_CONVERSION (newop2);
2338 gcc_checking_assert (is_gimple_val (newop1)
2339 && is_gimple_val (newop2));
2340 sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode);
2341 curr->op = gimple_get_lhs (sum);
2343 return true;
2346 return false;
2350 /* Transform repeated addition of same values into multiply with
2351 constant. */
2352 static bool
2353 transform_add_to_multiply (vec<operand_entry *> *ops)
2355 operand_entry *oe;
2356 tree op = NULL_TREE;
2357 int j;
2358 int i, start = -1, end = 0, count = 0;
2359 auto_vec<std::pair <int, int> > indxs;
2360 bool changed = false;
2362 if (!INTEGRAL_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2363 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE ((*ops)[0]->op))
2364 || !flag_unsafe_math_optimizations))
2365 return false;
2367 /* Look for repeated operands. */
2368 FOR_EACH_VEC_ELT (*ops, i, oe)
2370 if (start == -1)
2372 count = 1;
2373 op = oe->op;
2374 start = i;
2376 else if (operand_equal_p (oe->op, op, 0))
2378 count++;
2379 end = i;
2381 else
2383 if (count > 1)
2384 indxs.safe_push (std::make_pair (start, end));
2385 count = 1;
2386 op = oe->op;
2387 start = i;
2391 if (count > 1)
2392 indxs.safe_push (std::make_pair (start, end));
2394 for (j = indxs.length () - 1; j >= 0; --j)
2396 /* Convert repeated operand addition to multiplication. */
2397 start = indxs[j].first;
2398 end = indxs[j].second;
2399 op = (*ops)[start]->op;
2400 count = end - start + 1;
2401 for (i = end; i >= start; --i)
2402 ops->unordered_remove (i);
2403 tree tmp = make_ssa_name (TREE_TYPE (op));
2404 tree cst = build_int_cst (integer_type_node, count);
2405 gassign *mul_stmt
2406 = gimple_build_assign (tmp, MULT_EXPR,
2407 op, fold_convert (TREE_TYPE (op), cst));
2408 gimple_set_visited (mul_stmt, true);
2409 add_to_ops_vec (ops, tmp, mul_stmt);
2410 changed = true;
2413 return changed;
2417 /* Perform various identities and other optimizations on the list of
2418 operand entries, stored in OPS. The tree code for the binary
2419 operation between all the operands is OPCODE. */
2421 static void
2422 optimize_ops_list (enum tree_code opcode,
2423 vec<operand_entry *> *ops)
2425 unsigned int length = ops->length ();
2426 unsigned int i;
2427 operand_entry *oe;
2428 operand_entry *oelast = NULL;
2429 bool iterate = false;
2431 if (length == 1)
2432 return;
2434 oelast = ops->last ();
2436 /* If the last two are constants, pop the constants off, merge them
2437 and try the next two. */
2438 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
2440 operand_entry *oelm1 = (*ops)[length - 2];
2442 if (oelm1->rank == 0
2443 && is_gimple_min_invariant (oelm1->op)
2444 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
2445 TREE_TYPE (oelast->op)))
2447 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
2448 oelm1->op, oelast->op);
2450 if (folded && is_gimple_min_invariant (folded))
2452 if (dump_file && (dump_flags & TDF_DETAILS))
2453 fprintf (dump_file, "Merging constants\n");
2455 ops->pop ();
2456 ops->pop ();
2458 add_to_ops_vec (ops, folded);
2459 reassociate_stats.constants_eliminated++;
2461 optimize_ops_list (opcode, ops);
2462 return;
2467 eliminate_using_constants (opcode, ops);
2468 oelast = NULL;
2470 for (i = 0; ops->iterate (i, &oe);)
2472 bool done = false;
2474 if (eliminate_not_pairs (opcode, ops, i, oe))
2475 return;
2476 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
2477 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))
2478 || (!done && eliminate_redundant_comparison (opcode, ops, i, oe)))
2480 if (done)
2481 return;
2482 iterate = true;
2483 oelast = NULL;
2484 continue;
2486 oelast = oe;
2487 i++;
2490 if (iterate)
2491 optimize_ops_list (opcode, ops);
2494 /* The following functions are subroutines to optimize_range_tests and allow
2495 it to try to change a logical combination of comparisons into a range
2496 test.
2498 For example, both
2499 X == 2 || X == 5 || X == 3 || X == 4
2501 X >= 2 && X <= 5
2502 are converted to
2503 (unsigned) (X - 2) <= 3
2505 For more information see comments above fold_test_range in fold-const.cc,
2506 this implementation is for GIMPLE. */
2510 /* Dump the range entry R to FILE, skipping its expression if SKIP_EXP. */
2512 void
2513 dump_range_entry (FILE *file, struct range_entry *r, bool skip_exp)
2515 if (!skip_exp)
2516 print_generic_expr (file, r->exp);
2517 fprintf (file, " %c[", r->in_p ? '+' : '-');
2518 print_generic_expr (file, r->low);
2519 fputs (", ", file);
2520 print_generic_expr (file, r->high);
2521 fputc (']', file);
2524 /* Dump the range entry R to STDERR. */
2526 DEBUG_FUNCTION void
2527 debug_range_entry (struct range_entry *r)
2529 dump_range_entry (stderr, r, false);
2530 fputc ('\n', stderr);
2533 /* This is similar to make_range in fold-const.cc, but on top of
2534 GIMPLE instead of trees. If EXP is non-NULL, it should be
2535 an SSA_NAME and STMT argument is ignored, otherwise STMT
2536 argument should be a GIMPLE_COND. */
2538 void
2539 init_range_entry (struct range_entry *r, tree exp, gimple *stmt)
2541 int in_p;
2542 tree low, high;
2543 bool is_bool, strict_overflow_p;
2545 r->exp = NULL_TREE;
2546 r->in_p = false;
2547 r->strict_overflow_p = false;
2548 r->low = NULL_TREE;
2549 r->high = NULL_TREE;
2550 if (exp != NULL_TREE
2551 && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp))))
2552 return;
2554 /* Start with simply saying "EXP != 0" and then look at the code of EXP
2555 and see if we can refine the range. Some of the cases below may not
2556 happen, but it doesn't seem worth worrying about this. We "continue"
2557 the outer loop when we've changed something; otherwise we "break"
2558 the switch, which will "break" the while. */
2559 low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node;
2560 high = low;
2561 in_p = 0;
2562 strict_overflow_p = false;
2563 is_bool = false;
2564 if (exp == NULL_TREE)
2565 is_bool = true;
2566 else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1)
2568 if (TYPE_UNSIGNED (TREE_TYPE (exp)))
2569 is_bool = true;
2570 else
2571 return;
2573 else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE)
2574 is_bool = true;
2576 while (1)
2578 enum tree_code code;
2579 tree arg0, arg1, exp_type;
2580 tree nexp;
2581 location_t loc;
2583 if (exp != NULL_TREE)
2585 if (TREE_CODE (exp) != SSA_NAME
2586 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
2587 break;
2589 stmt = SSA_NAME_DEF_STMT (exp);
2590 if (!is_gimple_assign (stmt))
2591 break;
2593 code = gimple_assign_rhs_code (stmt);
2594 arg0 = gimple_assign_rhs1 (stmt);
2595 arg1 = gimple_assign_rhs2 (stmt);
2596 exp_type = TREE_TYPE (exp);
2598 else
2600 code = gimple_cond_code (stmt);
2601 arg0 = gimple_cond_lhs (stmt);
2602 arg1 = gimple_cond_rhs (stmt);
2603 exp_type = boolean_type_node;
2606 if (TREE_CODE (arg0) != SSA_NAME
2607 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)
2608 || ssa_name_maybe_undef_p (arg0))
2609 break;
2610 loc = gimple_location (stmt);
2611 switch (code)
2613 case BIT_NOT_EXPR:
2614 if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE
2615 /* Ensure the range is either +[-,0], +[0,0],
2616 -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or
2617 -[1,1]. If it is e.g. +[-,-] or -[-,-]
2618 or similar expression of unconditional true or
2619 false, it should not be negated. */
2620 && ((high && integer_zerop (high))
2621 || (low && integer_onep (low))))
2623 in_p = !in_p;
2624 exp = arg0;
2625 continue;
2627 break;
2628 case SSA_NAME:
2629 exp = arg0;
2630 continue;
2631 CASE_CONVERT:
2632 if (is_bool)
2634 if ((TYPE_PRECISION (exp_type) == 1
2635 || TREE_CODE (exp_type) == BOOLEAN_TYPE)
2636 && TYPE_PRECISION (TREE_TYPE (arg0)) > 1)
2637 return;
2639 else if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1)
2641 if (TYPE_UNSIGNED (TREE_TYPE (arg0)))
2642 is_bool = true;
2643 else
2644 return;
2646 else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE)
2647 is_bool = true;
2648 goto do_default;
2649 case EQ_EXPR:
2650 case NE_EXPR:
2651 case LT_EXPR:
2652 case LE_EXPR:
2653 case GE_EXPR:
2654 case GT_EXPR:
2655 is_bool = true;
2656 /* FALLTHRU */
2657 default:
2658 if (!is_bool)
2659 return;
2660 do_default:
2661 nexp = make_range_step (loc, code, arg0, arg1, exp_type,
2662 &low, &high, &in_p,
2663 &strict_overflow_p);
2664 if (nexp != NULL_TREE)
2666 exp = nexp;
2667 gcc_assert (TREE_CODE (exp) == SSA_NAME);
2668 continue;
2670 break;
2672 break;
2674 if (is_bool)
2676 r->exp = exp;
2677 r->in_p = in_p;
2678 r->low = low;
2679 r->high = high;
2680 r->strict_overflow_p = strict_overflow_p;
2684 /* Comparison function for qsort. Sort entries
2685 without SSA_NAME exp first, then with SSA_NAMEs sorted
2686 by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs
2687 by increasing ->low and if ->low is the same, by increasing
2688 ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE
2689 maximum. */
2691 static int
2692 range_entry_cmp (const void *a, const void *b)
2694 const struct range_entry *p = (const struct range_entry *) a;
2695 const struct range_entry *q = (const struct range_entry *) b;
2697 if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME)
2699 if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2701 /* Group range_entries for the same SSA_NAME together. */
2702 if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp))
2703 return -1;
2704 else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp))
2705 return 1;
2706 /* If ->low is different, NULL low goes first, then by
2707 ascending low. */
2708 if (p->low != NULL_TREE)
2710 if (q->low != NULL_TREE)
2712 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2713 p->low, q->low);
2714 if (tem && integer_onep (tem))
2715 return -1;
2716 tem = fold_binary (GT_EXPR, boolean_type_node,
2717 p->low, q->low);
2718 if (tem && integer_onep (tem))
2719 return 1;
2721 else
2722 return 1;
2724 else if (q->low != NULL_TREE)
2725 return -1;
2726 /* If ->high is different, NULL high goes last, before that by
2727 ascending high. */
2728 if (p->high != NULL_TREE)
2730 if (q->high != NULL_TREE)
2732 tree tem = fold_binary (LT_EXPR, boolean_type_node,
2733 p->high, q->high);
2734 if (tem && integer_onep (tem))
2735 return -1;
2736 tem = fold_binary (GT_EXPR, boolean_type_node,
2737 p->high, q->high);
2738 if (tem && integer_onep (tem))
2739 return 1;
2741 else
2742 return -1;
2744 else if (q->high != NULL_TREE)
2745 return 1;
2746 /* If both ranges are the same, sort below by ascending idx. */
2748 else
2749 return 1;
2751 else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME)
2752 return -1;
2754 if (p->idx < q->idx)
2755 return -1;
2756 else
2758 gcc_checking_assert (p->idx > q->idx);
2759 return 1;
2763 /* Helper function for update_range_test. Force EXPR into an SSA_NAME,
2764 insert needed statements BEFORE or after GSI. */
2766 static tree
2767 force_into_ssa_name (gimple_stmt_iterator *gsi, tree expr, bool before)
2769 enum gsi_iterator_update m = before ? GSI_SAME_STMT : GSI_CONTINUE_LINKING;
2770 tree ret = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE, before, m);
2771 if (TREE_CODE (ret) != SSA_NAME)
2773 gimple *g = gimple_build_assign (make_ssa_name (TREE_TYPE (ret)), ret);
2774 if (before)
2775 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2776 else
2777 gsi_insert_after (gsi, g, GSI_CONTINUE_LINKING);
2778 ret = gimple_assign_lhs (g);
2780 return ret;
2783 /* Helper routine of optimize_range_test.
2784 [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for
2785 RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges,
2786 OPCODE and OPS are arguments of optimize_range_tests. If OTHERRANGE
2787 is NULL, OTHERRANGEP should not be and then OTHERRANGEP points to
2788 an array of COUNT pointers to other ranges. Return
2789 true if the range merge has been successful.
2790 If OPCODE is ERROR_MARK, this is called from within
2791 maybe_optimize_range_tests and is performing inter-bb range optimization.
2792 In that case, whether an op is BIT_AND_EXPR or BIT_IOR_EXPR is found in
2793 oe->rank. */
2795 static bool
2796 update_range_test (struct range_entry *range, struct range_entry *otherrange,
2797 struct range_entry **otherrangep,
2798 unsigned int count, enum tree_code opcode,
2799 vec<operand_entry *> *ops, tree exp, gimple_seq seq,
2800 bool in_p, tree low, tree high, bool strict_overflow_p)
2802 unsigned int idx = range->idx;
2803 struct range_entry *swap_with = NULL;
2804 basic_block rewrite_bb_first = NULL, rewrite_bb_last = NULL;
2805 if (opcode == ERROR_MARK)
2807 /* For inter-bb range test optimization, pick from the range tests
2808 the one which is tested in the earliest condition (one dominating
2809 the others), because otherwise there could be some UB (e.g. signed
2810 overflow) in following bbs that we'd expose which wasn't there in
2811 the original program. See PR104196. */
2812 basic_block orig_range_bb = BASIC_BLOCK_FOR_FN (cfun, (*ops)[idx]->id);
2813 basic_block range_bb = orig_range_bb;
2814 for (unsigned int i = 0; i < count; i++)
2816 struct range_entry *this_range;
2817 if (otherrange)
2818 this_range = otherrange + i;
2819 else
2820 this_range = otherrangep[i];
2821 operand_entry *oe = (*ops)[this_range->idx];
2822 basic_block this_bb = BASIC_BLOCK_FOR_FN (cfun, oe->id);
2823 if (range_bb != this_bb
2824 && dominated_by_p (CDI_DOMINATORS, range_bb, this_bb))
2826 swap_with = this_range;
2827 range_bb = this_bb;
2828 idx = this_range->idx;
2831 /* If seq is non-NULL, it can contain statements that use SSA_NAMEs
2832 only defined in later blocks. In this case we can't move the
2833 merged comparison earlier, so instead check if there are any stmts
2834 that might trigger signed integer overflow in between and rewrite
2835 them. But only after we check if the optimization is possible. */
2836 if (seq && swap_with)
2838 rewrite_bb_first = range_bb;
2839 rewrite_bb_last = orig_range_bb;
2840 idx = range->idx;
2841 swap_with = NULL;
2844 operand_entry *oe = (*ops)[idx];
2845 tree op = oe->op;
2846 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
2847 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
2848 location_t loc = gimple_location (stmt);
2849 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
2850 tree tem = build_range_check (loc, optype, unshare_expr (exp),
2851 in_p, low, high);
2852 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
2853 gimple_stmt_iterator gsi;
2854 unsigned int i, uid;
2856 if (tem == NULL_TREE)
2857 return false;
2859 /* If op is default def SSA_NAME, there is no place to insert the
2860 new comparison. Give up, unless we can use OP itself as the
2861 range test. */
2862 if (op && SSA_NAME_IS_DEFAULT_DEF (op))
2864 if (op == range->exp
2865 && ((TYPE_PRECISION (optype) == 1 && TYPE_UNSIGNED (optype))
2866 || TREE_CODE (optype) == BOOLEAN_TYPE)
2867 && (op == tem
2868 || (TREE_CODE (tem) == EQ_EXPR
2869 && TREE_OPERAND (tem, 0) == op
2870 && integer_onep (TREE_OPERAND (tem, 1))))
2871 && opcode != BIT_IOR_EXPR
2872 && (opcode != ERROR_MARK || oe->rank != BIT_IOR_EXPR))
2874 stmt = NULL;
2875 tem = op;
2877 else
2878 return false;
2881 if (swap_with)
2882 std::swap (range->idx, swap_with->idx);
2884 if (strict_overflow_p && issue_strict_overflow_warning (wc))
2885 warning_at (loc, OPT_Wstrict_overflow,
2886 "assuming signed overflow does not occur "
2887 "when simplifying range test");
2889 if (dump_file && (dump_flags & TDF_DETAILS))
2891 struct range_entry *r;
2892 fprintf (dump_file, "Optimizing range tests ");
2893 dump_range_entry (dump_file, range, false);
2894 for (i = 0; i < count; i++)
2896 if (otherrange)
2897 r = otherrange + i;
2898 else
2899 r = otherrangep[i];
2900 if (r->exp
2901 && r->exp != range->exp
2902 && TREE_CODE (r->exp) == SSA_NAME)
2904 fprintf (dump_file, " and ");
2905 dump_range_entry (dump_file, r, false);
2907 else
2909 fprintf (dump_file, " and");
2910 dump_range_entry (dump_file, r, true);
2913 fprintf (dump_file, "\n into ");
2914 print_generic_expr (dump_file, tem);
2915 fprintf (dump_file, "\n");
2918 /* In inter-bb range optimization mode, if we have a seq, we can't
2919 move the merged comparison to the earliest bb from the comparisons
2920 being replaced, so instead rewrite stmts that could trigger signed
2921 integer overflow. */
2922 for (basic_block bb = rewrite_bb_last;
2923 bb != rewrite_bb_first; bb = single_pred (bb))
2924 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
2925 !gsi_end_p (gsi); gsi_next (&gsi))
2927 gimple *stmt = gsi_stmt (gsi);
2928 if (is_gimple_assign (stmt))
2929 if (tree lhs = gimple_assign_lhs (stmt))
2930 if ((INTEGRAL_TYPE_P (TREE_TYPE (lhs))
2931 || POINTER_TYPE_P (TREE_TYPE (lhs)))
2932 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
2934 enum tree_code code = gimple_assign_rhs_code (stmt);
2935 if (arith_code_with_undefined_signed_overflow (code))
2937 gimple_stmt_iterator gsip = gsi;
2938 gimple_stmt_iterator gsin = gsi;
2939 gsi_prev (&gsip);
2940 gsi_next (&gsin);
2941 rewrite_to_defined_overflow (&gsi);
2942 unsigned uid = gimple_uid (stmt);
2943 if (gsi_end_p (gsip))
2944 gsip = gsi_after_labels (bb);
2945 else
2946 gsi_next (&gsip);
2947 for (; gsi_stmt (gsip) != gsi_stmt (gsin);
2948 gsi_next (&gsip))
2949 gimple_set_uid (gsi_stmt (gsip), uid);
2954 if (opcode == BIT_IOR_EXPR
2955 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
2956 tem = invert_truthvalue_loc (loc, tem);
2958 tem = fold_convert_loc (loc, optype, tem);
2959 if (stmt)
2961 gsi = gsi_for_stmt (stmt);
2962 uid = gimple_uid (stmt);
2964 else
2966 gsi = gsi_none ();
2967 uid = 0;
2969 if (stmt == NULL)
2970 gcc_checking_assert (tem == op);
2971 /* In rare cases range->exp can be equal to lhs of stmt.
2972 In that case we have to insert after the stmt rather then before
2973 it. If stmt is a PHI, insert it at the start of the basic block. */
2974 else if (op != range->exp)
2976 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
2977 tem = force_into_ssa_name (&gsi, tem, true);
2978 gsi_prev (&gsi);
2980 else if (gimple_code (stmt) != GIMPLE_PHI)
2982 gsi_insert_seq_after (&gsi, seq, GSI_CONTINUE_LINKING);
2983 tem = force_into_ssa_name (&gsi, tem, false);
2985 else
2987 gsi = gsi_after_labels (gimple_bb (stmt));
2988 if (!gsi_end_p (gsi))
2989 uid = gimple_uid (gsi_stmt (gsi));
2990 else
2992 gsi = gsi_start_bb (gimple_bb (stmt));
2993 uid = 1;
2994 while (!gsi_end_p (gsi))
2996 uid = gimple_uid (gsi_stmt (gsi));
2997 gsi_next (&gsi);
3000 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
3001 tem = force_into_ssa_name (&gsi, tem, true);
3002 if (gsi_end_p (gsi))
3003 gsi = gsi_last_bb (gimple_bb (stmt));
3004 else
3005 gsi_prev (&gsi);
3007 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3008 if (gimple_uid (gsi_stmt (gsi)))
3009 break;
3010 else
3011 gimple_set_uid (gsi_stmt (gsi), uid);
3013 oe->op = tem;
3014 range->exp = exp;
3015 range->low = low;
3016 range->high = high;
3017 range->in_p = in_p;
3018 range->strict_overflow_p = false;
3020 for (i = 0; i < count; i++)
3022 if (otherrange)
3023 range = otherrange + i;
3024 else
3025 range = otherrangep[i];
3026 oe = (*ops)[range->idx];
3027 /* Now change all the other range test immediate uses, so that
3028 those tests will be optimized away. */
3029 if (opcode == ERROR_MARK)
3031 if (oe->op)
3032 oe->op = build_int_cst (TREE_TYPE (oe->op),
3033 oe->rank == BIT_IOR_EXPR ? 0 : 1);
3034 else
3035 oe->op = (oe->rank == BIT_IOR_EXPR
3036 ? boolean_false_node : boolean_true_node);
3038 else
3039 oe->op = error_mark_node;
3040 range->exp = NULL_TREE;
3041 range->low = NULL_TREE;
3042 range->high = NULL_TREE;
3044 return true;
3047 /* Optimize X == CST1 || X == CST2
3048 if popcount (CST1 ^ CST2) == 1 into
3049 (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)).
3050 Similarly for ranges. E.g.
3051 X != 2 && X != 3 && X != 10 && X != 11
3052 will be transformed by the previous optimization into
3053 !((X - 2U) <= 1U || (X - 10U) <= 1U)
3054 and this loop can transform that into
3055 !(((X & ~8) - 2U) <= 1U). */
3057 static bool
3058 optimize_range_tests_xor (enum tree_code opcode, tree type,
3059 tree lowi, tree lowj, tree highi, tree highj,
3060 vec<operand_entry *> *ops,
3061 struct range_entry *rangei,
3062 struct range_entry *rangej)
3064 tree lowxor, highxor, tem, exp;
3065 /* Check lowi ^ lowj == highi ^ highj and
3066 popcount (lowi ^ lowj) == 1. */
3067 lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj);
3068 if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST)
3069 return false;
3070 if (!integer_pow2p (lowxor))
3071 return false;
3072 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
3073 if (!tree_int_cst_equal (lowxor, highxor))
3074 return false;
3076 exp = rangei->exp;
3077 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3078 int prec = GET_MODE_PRECISION (mode);
3079 if (TYPE_PRECISION (type) < prec
3080 || (wi::to_wide (TYPE_MIN_VALUE (type))
3081 != wi::min_value (prec, TYPE_SIGN (type)))
3082 || (wi::to_wide (TYPE_MAX_VALUE (type))
3083 != wi::max_value (prec, TYPE_SIGN (type))))
3085 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
3086 exp = fold_convert (type, exp);
3087 lowxor = fold_convert (type, lowxor);
3088 lowi = fold_convert (type, lowi);
3089 highi = fold_convert (type, highi);
3091 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
3092 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
3093 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
3094 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
3095 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
3096 NULL, rangei->in_p, lowj, highj,
3097 rangei->strict_overflow_p
3098 || rangej->strict_overflow_p))
3099 return true;
3100 return false;
3103 /* Optimize X == CST1 || X == CST2
3104 if popcount (CST2 - CST1) == 1 into
3105 ((X - CST1) & ~(CST2 - CST1)) == 0.
3106 Similarly for ranges. E.g.
3107 X == 43 || X == 76 || X == 44 || X == 78 || X == 77 || X == 46
3108 || X == 75 || X == 45
3109 will be transformed by the previous optimization into
3110 (X - 43U) <= 3U || (X - 75U) <= 3U
3111 and this loop can transform that into
3112 ((X - 43U) & ~(75U - 43U)) <= 3U. */
3113 static bool
3114 optimize_range_tests_diff (enum tree_code opcode, tree type,
3115 tree lowi, tree lowj, tree highi, tree highj,
3116 vec<operand_entry *> *ops,
3117 struct range_entry *rangei,
3118 struct range_entry *rangej)
3120 tree tem1, tem2, mask;
3121 /* Check highi - lowi == highj - lowj. */
3122 tem1 = fold_binary (MINUS_EXPR, type, highi, lowi);
3123 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3124 return false;
3125 tem2 = fold_binary (MINUS_EXPR, type, highj, lowj);
3126 if (!tree_int_cst_equal (tem1, tem2))
3127 return false;
3128 /* Check popcount (lowj - lowi) == 1. */
3129 tem1 = fold_binary (MINUS_EXPR, type, lowj, lowi);
3130 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
3131 return false;
3132 if (!integer_pow2p (tem1))
3133 return false;
3135 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
3136 int prec = GET_MODE_PRECISION (mode);
3137 if (TYPE_PRECISION (type) < prec
3138 || (wi::to_wide (TYPE_MIN_VALUE (type))
3139 != wi::min_value (prec, TYPE_SIGN (type)))
3140 || (wi::to_wide (TYPE_MAX_VALUE (type))
3141 != wi::max_value (prec, TYPE_SIGN (type))))
3142 type = build_nonstandard_integer_type (prec, 1);
3143 else
3144 type = unsigned_type_for (type);
3145 tem1 = fold_convert (type, tem1);
3146 tem2 = fold_convert (type, tem2);
3147 lowi = fold_convert (type, lowi);
3148 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
3149 tem1 = fold_build2 (MINUS_EXPR, type,
3150 fold_convert (type, rangei->exp), lowi);
3151 tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
3152 lowj = build_int_cst (type, 0);
3153 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, tem1,
3154 NULL, rangei->in_p, lowj, tem2,
3155 rangei->strict_overflow_p
3156 || rangej->strict_overflow_p))
3157 return true;
3158 return false;
3161 /* It does some common checks for function optimize_range_tests_xor and
3162 optimize_range_tests_diff.
3163 If OPTIMIZE_XOR is TRUE, it calls optimize_range_tests_xor.
3164 Else it calls optimize_range_tests_diff. */
3166 static bool
3167 optimize_range_tests_1 (enum tree_code opcode, int first, int length,
3168 bool optimize_xor, vec<operand_entry *> *ops,
3169 struct range_entry *ranges)
3171 int i, j;
3172 bool any_changes = false;
3173 for (i = first; i < length; i++)
3175 tree lowi, highi, lowj, highj, type, tem;
3177 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3178 continue;
3179 type = TREE_TYPE (ranges[i].exp);
3180 if (!INTEGRAL_TYPE_P (type))
3181 continue;
3182 lowi = ranges[i].low;
3183 if (lowi == NULL_TREE)
3184 lowi = TYPE_MIN_VALUE (type);
3185 highi = ranges[i].high;
3186 if (highi == NULL_TREE)
3187 continue;
3188 for (j = i + 1; j < length && j < i + 64; j++)
3190 bool changes;
3191 if (ranges[i].exp != ranges[j].exp || ranges[j].in_p)
3192 continue;
3193 lowj = ranges[j].low;
3194 if (lowj == NULL_TREE)
3195 continue;
3196 highj = ranges[j].high;
3197 if (highj == NULL_TREE)
3198 highj = TYPE_MAX_VALUE (type);
3199 /* Check lowj > highi. */
3200 tem = fold_binary (GT_EXPR, boolean_type_node,
3201 lowj, highi);
3202 if (tem == NULL_TREE || !integer_onep (tem))
3203 continue;
3204 if (optimize_xor)
3205 changes = optimize_range_tests_xor (opcode, type, lowi, lowj,
3206 highi, highj, ops,
3207 ranges + i, ranges + j);
3208 else
3209 changes = optimize_range_tests_diff (opcode, type, lowi, lowj,
3210 highi, highj, ops,
3211 ranges + i, ranges + j);
3212 if (changes)
3214 any_changes = true;
3215 break;
3219 return any_changes;
3222 /* Helper function of optimize_range_tests_to_bit_test. Handle a single
3223 range, EXP, LOW, HIGH, compute bit mask of bits to test and return
3224 EXP on success, NULL otherwise. */
3226 static tree
3227 extract_bit_test_mask (tree exp, int prec, tree totallow, tree low, tree high,
3228 wide_int *mask, tree *totallowp)
3230 tree tem = int_const_binop (MINUS_EXPR, high, low);
3231 if (tem == NULL_TREE
3232 || TREE_CODE (tem) != INTEGER_CST
3233 || TREE_OVERFLOW (tem)
3234 || tree_int_cst_sgn (tem) == -1
3235 || compare_tree_int (tem, prec) != -1)
3236 return NULL_TREE;
3238 unsigned HOST_WIDE_INT max = tree_to_uhwi (tem) + 1;
3239 *mask = wi::shifted_mask (0, max, false, prec);
3240 if (TREE_CODE (exp) == BIT_AND_EXPR
3241 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3243 widest_int msk = wi::to_widest (TREE_OPERAND (exp, 1));
3244 msk = wi::zext (~msk, TYPE_PRECISION (TREE_TYPE (exp)));
3245 if (wi::popcount (msk) == 1
3246 && wi::ltu_p (msk, prec - max))
3248 *mask |= wi::shifted_mask (msk.to_uhwi (), max, false, prec);
3249 max += msk.to_uhwi ();
3250 exp = TREE_OPERAND (exp, 0);
3251 if (integer_zerop (low)
3252 && TREE_CODE (exp) == PLUS_EXPR
3253 && TREE_CODE (TREE_OPERAND (exp, 1)) == INTEGER_CST)
3255 tree ret = TREE_OPERAND (exp, 0);
3256 STRIP_NOPS (ret);
3257 widest_int bias
3258 = wi::neg (wi::sext (wi::to_widest (TREE_OPERAND (exp, 1)),
3259 TYPE_PRECISION (TREE_TYPE (low))));
3260 tree tbias = wide_int_to_tree (TREE_TYPE (ret), bias);
3261 if (totallowp)
3263 *totallowp = tbias;
3264 return ret;
3266 else if (!tree_int_cst_lt (totallow, tbias))
3267 return NULL_TREE;
3268 bias = wi::to_widest (tbias);
3269 bias -= wi::to_widest (totallow);
3270 if (bias >= 0 && bias < prec - max)
3272 *mask = wi::lshift (*mask, bias);
3273 return ret;
3278 if (totallowp)
3279 return exp;
3280 if (!tree_int_cst_lt (totallow, low))
3281 return exp;
3282 tem = int_const_binop (MINUS_EXPR, low, totallow);
3283 if (tem == NULL_TREE
3284 || TREE_CODE (tem) != INTEGER_CST
3285 || TREE_OVERFLOW (tem)
3286 || compare_tree_int (tem, prec - max) == 1)
3287 return NULL_TREE;
3289 *mask = wi::lshift (*mask, wi::to_widest (tem));
3290 return exp;
3293 /* Attempt to optimize small range tests using bit test.
3294 E.g.
3295 X != 43 && X != 76 && X != 44 && X != 78 && X != 49
3296 && X != 77 && X != 46 && X != 75 && X != 45 && X != 82
3297 has been by earlier optimizations optimized into:
3298 ((X - 43U) & ~32U) > 3U && X != 49 && X != 82
3299 As all the 43 through 82 range is less than 64 numbers,
3300 for 64-bit word targets optimize that into:
3301 (X - 43U) > 40U && ((1 << (X - 43U)) & 0x8F0000004FULL) == 0 */
3303 static bool
3304 optimize_range_tests_to_bit_test (enum tree_code opcode, int first, int length,
3305 vec<operand_entry *> *ops,
3306 struct range_entry *ranges)
3308 int i, j;
3309 bool any_changes = false;
3310 int prec = GET_MODE_BITSIZE (word_mode);
3311 auto_vec<struct range_entry *, 64> candidates;
3313 for (i = first; i < length - 1; i++)
3315 tree lowi, highi, lowj, highj, type;
3317 if (ranges[i].exp == NULL_TREE || ranges[i].in_p)
3318 continue;
3319 type = TREE_TYPE (ranges[i].exp);
3320 if (!INTEGRAL_TYPE_P (type))
3321 continue;
3322 lowi = ranges[i].low;
3323 if (lowi == NULL_TREE)
3324 lowi = TYPE_MIN_VALUE (type);
3325 highi = ranges[i].high;
3326 if (highi == NULL_TREE)
3327 continue;
3328 wide_int mask;
3329 tree exp = extract_bit_test_mask (ranges[i].exp, prec, lowi, lowi,
3330 highi, &mask, &lowi);
3331 if (exp == NULL_TREE)
3332 continue;
3333 bool strict_overflow_p = ranges[i].strict_overflow_p;
3334 candidates.truncate (0);
3335 int end = MIN (i + 64, length);
3336 for (j = i + 1; j < end; j++)
3338 tree exp2;
3339 if (ranges[j].exp == NULL_TREE || ranges[j].in_p)
3340 continue;
3341 if (ranges[j].exp == exp)
3343 else if (TREE_CODE (ranges[j].exp) == BIT_AND_EXPR)
3345 exp2 = TREE_OPERAND (ranges[j].exp, 0);
3346 if (exp2 == exp)
3348 else if (TREE_CODE (exp2) == PLUS_EXPR)
3350 exp2 = TREE_OPERAND (exp2, 0);
3351 STRIP_NOPS (exp2);
3352 if (exp2 != exp)
3353 continue;
3355 else
3356 continue;
3358 else
3359 continue;
3360 lowj = ranges[j].low;
3361 if (lowj == NULL_TREE)
3362 continue;
3363 highj = ranges[j].high;
3364 if (highj == NULL_TREE)
3365 highj = TYPE_MAX_VALUE (type);
3366 wide_int mask2;
3367 exp2 = extract_bit_test_mask (ranges[j].exp, prec, lowi, lowj,
3368 highj, &mask2, NULL);
3369 if (exp2 != exp)
3370 continue;
3371 mask |= mask2;
3372 strict_overflow_p |= ranges[j].strict_overflow_p;
3373 candidates.safe_push (&ranges[j]);
3376 /* If every possible relative value of the expression is a valid shift
3377 amount, then we can merge the entry test in the bit test. In this
3378 case, if we would need otherwise 2 or more comparisons, then use
3379 the bit test; in the other cases, the threshold is 3 comparisons. */
3380 bool entry_test_needed;
3381 int_range_max r;
3382 if (TREE_CODE (exp) == SSA_NAME
3383 && get_range_query (cfun)->range_of_expr (r, exp)
3384 && !r.undefined_p ()
3385 && !r.varying_p ()
3386 && wi::leu_p (r.upper_bound () - r.lower_bound (), prec - 1))
3388 wide_int min = r.lower_bound ();
3389 wide_int ilowi = wi::to_wide (lowi);
3390 if (wi::lt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3392 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3393 mask = wi::lshift (mask, ilowi - min);
3395 else if (wi::gt_p (min, ilowi, TYPE_SIGN (TREE_TYPE (lowi))))
3397 lowi = wide_int_to_tree (TREE_TYPE (lowi), min);
3398 mask = wi::lrshift (mask, min - ilowi);
3400 entry_test_needed = false;
3402 else
3403 entry_test_needed = true;
3404 if (candidates.length () >= (entry_test_needed ? 2 : 1))
3406 tree high = wide_int_to_tree (TREE_TYPE (lowi),
3407 wi::to_widest (lowi)
3408 + prec - 1 - wi::clz (mask));
3409 operand_entry *oe = (*ops)[ranges[i].idx];
3410 tree op = oe->op;
3411 gimple *stmt = op ? SSA_NAME_DEF_STMT (op)
3412 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN
3413 (cfun, oe->id));
3414 location_t loc = gimple_location (stmt);
3415 tree optype = op ? TREE_TYPE (op) : boolean_type_node;
3417 /* See if it isn't cheaper to pretend the minimum value of the
3418 range is 0, if maximum value is small enough.
3419 We can avoid then subtraction of the minimum value, but the
3420 mask constant could be perhaps more expensive. */
3421 if (compare_tree_int (lowi, 0) > 0
3422 && compare_tree_int (high, prec) < 0
3423 && (entry_test_needed || wi::ltu_p (r.upper_bound (), prec)))
3425 int cost_diff;
3426 HOST_WIDE_INT m = tree_to_uhwi (lowi);
3427 rtx reg = gen_raw_REG (word_mode, 10000);
3428 bool speed_p = optimize_bb_for_speed_p (gimple_bb (stmt));
3429 cost_diff = set_src_cost (gen_rtx_PLUS (word_mode, reg,
3430 GEN_INT (-m)),
3431 word_mode, speed_p);
3432 rtx r = immed_wide_int_const (mask, word_mode);
3433 cost_diff += set_src_cost (gen_rtx_AND (word_mode, reg, r),
3434 word_mode, speed_p);
3435 r = immed_wide_int_const (wi::lshift (mask, m), word_mode);
3436 cost_diff -= set_src_cost (gen_rtx_AND (word_mode, reg, r),
3437 word_mode, speed_p);
3438 if (cost_diff > 0)
3440 mask = wi::lshift (mask, m);
3441 lowi = build_zero_cst (TREE_TYPE (lowi));
3445 tree tem;
3446 if (entry_test_needed)
3448 tem = build_range_check (loc, optype, unshare_expr (exp),
3449 false, lowi, high);
3450 if (tem == NULL_TREE || is_gimple_val (tem))
3451 continue;
3453 else
3454 tem = NULL_TREE;
3455 tree etype = unsigned_type_for (TREE_TYPE (exp));
3456 exp = fold_build2_loc (loc, MINUS_EXPR, etype,
3457 fold_convert_loc (loc, etype, exp),
3458 fold_convert_loc (loc, etype, lowi));
3459 exp = fold_convert_loc (loc, integer_type_node, exp);
3460 tree word_type = lang_hooks.types.type_for_mode (word_mode, 1);
3461 exp = fold_build2_loc (loc, LSHIFT_EXPR, word_type,
3462 build_int_cst (word_type, 1), exp);
3463 exp = fold_build2_loc (loc, BIT_AND_EXPR, word_type, exp,
3464 wide_int_to_tree (word_type, mask));
3465 exp = fold_build2_loc (loc, EQ_EXPR, optype, exp,
3466 build_zero_cst (word_type));
3467 if (is_gimple_val (exp))
3468 continue;
3470 /* The shift might have undefined behavior if TEM is true,
3471 but reassociate_bb isn't prepared to have basic blocks
3472 split when it is running. So, temporarily emit a code
3473 with BIT_IOR_EXPR instead of &&, and fix it up in
3474 branch_fixup. */
3475 gimple_seq seq = NULL;
3476 if (tem)
3478 tem = force_gimple_operand (tem, &seq, true, NULL_TREE);
3479 gcc_assert (TREE_CODE (tem) == SSA_NAME);
3480 gimple_set_visited (SSA_NAME_DEF_STMT (tem), true);
3482 gimple_seq seq2;
3483 exp = force_gimple_operand (exp, &seq2, true, NULL_TREE);
3484 gimple_seq_add_seq_without_update (&seq, seq2);
3485 gcc_assert (TREE_CODE (exp) == SSA_NAME);
3486 gimple_set_visited (SSA_NAME_DEF_STMT (exp), true);
3487 if (tem)
3489 gimple *g = gimple_build_assign (make_ssa_name (optype),
3490 BIT_IOR_EXPR, tem, exp);
3491 gimple_set_location (g, loc);
3492 gimple_seq_add_stmt_without_update (&seq, g);
3493 exp = gimple_assign_lhs (g);
3495 tree val = build_zero_cst (optype);
3496 if (update_range_test (&ranges[i], NULL, candidates.address (),
3497 candidates.length (), opcode, ops, exp,
3498 seq, false, val, val, strict_overflow_p))
3500 any_changes = true;
3501 if (tem)
3502 reassoc_branch_fixups.safe_push (tem);
3504 else
3505 gimple_seq_discard (seq);
3508 return any_changes;
3511 /* Optimize x != 0 && y != 0 && z != 0 into (x | y | z) != 0
3512 and similarly x != -1 && y != -1 && y != -1 into (x & y & z) != -1.
3513 Also, handle x < C && y < C && z < C where C is power of two as
3514 (x | y | z) < C. And also handle signed x < 0 && y < 0 && z < 0
3515 as (x | y | z) < 0. */
3517 static bool
3518 optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
3519 vec<operand_entry *> *ops,
3520 struct range_entry *ranges)
3522 int i;
3523 unsigned int b;
3524 bool any_changes = false;
3525 auto_vec<int, 128> buckets;
3526 auto_vec<int, 32> chains;
3527 auto_vec<struct range_entry *, 32> candidates;
3529 for (i = first; i < length; i++)
3531 int idx;
3533 if (ranges[i].exp == NULL_TREE
3534 || TREE_CODE (ranges[i].exp) != SSA_NAME
3535 || TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) <= 1
3536 || TREE_CODE (TREE_TYPE (ranges[i].exp)) == BOOLEAN_TYPE)
3537 continue;
3539 if (ranges[i].low != NULL_TREE
3540 && ranges[i].high != NULL_TREE
3541 && ranges[i].in_p
3542 && tree_int_cst_equal (ranges[i].low, ranges[i].high))
3544 idx = !integer_zerop (ranges[i].low);
3545 if (idx && !integer_all_onesp (ranges[i].low))
3546 continue;
3548 else if (ranges[i].high != NULL_TREE
3549 && TREE_CODE (ranges[i].high) == INTEGER_CST
3550 && ranges[i].in_p)
3552 wide_int w = wi::to_wide (ranges[i].high);
3553 int prec = TYPE_PRECISION (TREE_TYPE (ranges[i].exp));
3554 int l = wi::clz (w);
3555 idx = 2;
3556 if (l <= 0
3557 || l >= prec
3558 || w != wi::mask (prec - l, false, prec))
3559 continue;
3560 if (!((TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3561 && ranges[i].low == NULL_TREE)
3562 || (ranges[i].low
3563 && integer_zerop (ranges[i].low))))
3564 continue;
3566 else if (ranges[i].high == NULL_TREE
3567 && ranges[i].low != NULL_TREE
3568 /* Perform this optimization only in the last
3569 reassoc pass, as it interferes with the reassociation
3570 itself or could also with VRP etc. which might not
3571 be able to virtually undo the optimization. */
3572 && !reassoc_insert_powi_p
3573 && !TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3574 && integer_zerop (ranges[i].low))
3575 idx = 3;
3576 else
3577 continue;
3579 b = TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) * 4 + idx;
3580 if (buckets.length () <= b)
3581 buckets.safe_grow_cleared (b + 1, true);
3582 if (chains.length () <= (unsigned) i)
3583 chains.safe_grow (i + 1, true);
3584 chains[i] = buckets[b];
3585 buckets[b] = i + 1;
3588 FOR_EACH_VEC_ELT (buckets, b, i)
3589 if (i && chains[i - 1])
3591 int j, k = i;
3592 if ((b % 4) == 2)
3594 /* When ranges[X - 1].high + 1 is a power of two,
3595 we need to process the same bucket up to
3596 precision - 1 times, each time split the entries
3597 with the same high bound into one chain and the
3598 rest into another one to be processed later. */
3599 int this_prev = i;
3600 int other_prev = 0;
3601 for (j = chains[i - 1]; j; j = chains[j - 1])
3603 if (tree_int_cst_equal (ranges[i - 1].high,
3604 ranges[j - 1].high))
3606 chains[this_prev - 1] = j;
3607 this_prev = j;
3609 else if (other_prev == 0)
3611 buckets[b] = j;
3612 other_prev = j;
3614 else
3616 chains[other_prev - 1] = j;
3617 other_prev = j;
3620 chains[this_prev - 1] = 0;
3621 if (other_prev)
3622 chains[other_prev - 1] = 0;
3623 if (chains[i - 1] == 0)
3625 if (other_prev)
3626 b--;
3627 continue;
3630 for (j = chains[i - 1]; j; j = chains[j - 1])
3632 gimple *gk = SSA_NAME_DEF_STMT (ranges[k - 1].exp);
3633 gimple *gj = SSA_NAME_DEF_STMT (ranges[j - 1].exp);
3634 if (reassoc_stmt_dominates_stmt_p (gk, gj))
3635 k = j;
3637 tree type1 = TREE_TYPE (ranges[k - 1].exp);
3638 tree type2 = NULL_TREE;
3639 bool strict_overflow_p = false;
3640 candidates.truncate (0);
3641 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3642 type1 = pointer_sized_int_node;
3643 for (j = i; j; j = chains[j - 1])
3645 tree type = TREE_TYPE (ranges[j - 1].exp);
3646 strict_overflow_p |= ranges[j - 1].strict_overflow_p;
3647 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3648 type = pointer_sized_int_node;
3649 if ((b % 4) == 3)
3651 /* For the signed < 0 cases, the types should be
3652 really compatible (all signed with the same precision,
3653 instead put ranges that have different in_p from
3654 k first. */
3655 if (!useless_type_conversion_p (type1, type))
3656 continue;
3657 if (ranges[j - 1].in_p != ranges[k - 1].in_p)
3658 candidates.safe_push (&ranges[j - 1]);
3659 type2 = type1;
3660 continue;
3662 if (j == k
3663 || useless_type_conversion_p (type1, type))
3665 else if (type2 == NULL_TREE
3666 || useless_type_conversion_p (type2, type))
3668 if (type2 == NULL_TREE)
3669 type2 = type;
3670 candidates.safe_push (&ranges[j - 1]);
3673 unsigned l = candidates.length ();
3674 for (j = i; j; j = chains[j - 1])
3676 tree type = TREE_TYPE (ranges[j - 1].exp);
3677 if (j == k)
3678 continue;
3679 if (POINTER_TYPE_P (type) || TREE_CODE (type) == OFFSET_TYPE)
3680 type = pointer_sized_int_node;
3681 if ((b % 4) == 3)
3683 if (!useless_type_conversion_p (type1, type))
3684 continue;
3685 if (ranges[j - 1].in_p == ranges[k - 1].in_p)
3686 candidates.safe_push (&ranges[j - 1]);
3687 continue;
3689 if (useless_type_conversion_p (type1, type))
3691 else if (type2 == NULL_TREE
3692 || useless_type_conversion_p (type2, type))
3693 continue;
3694 candidates.safe_push (&ranges[j - 1]);
3696 gimple_seq seq = NULL;
3697 tree op = NULL_TREE;
3698 unsigned int id;
3699 struct range_entry *r;
3700 candidates.safe_push (&ranges[k - 1]);
3701 FOR_EACH_VEC_ELT (candidates, id, r)
3703 gimple *g;
3704 enum tree_code code;
3705 if (id == 0)
3707 op = r->exp;
3708 continue;
3710 if (id == l
3711 || POINTER_TYPE_P (TREE_TYPE (op))
3712 || TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3714 code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
3715 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3716 if (code == BIT_NOT_EXPR
3717 && TREE_CODE (TREE_TYPE (op)) == OFFSET_TYPE)
3719 g = gimple_build_assign (make_ssa_name (type3),
3720 NOP_EXPR, op);
3721 gimple_seq_add_stmt_without_update (&seq, g);
3722 op = gimple_assign_lhs (g);
3724 g = gimple_build_assign (make_ssa_name (type3), code, op);
3725 gimple_seq_add_stmt_without_update (&seq, g);
3726 op = gimple_assign_lhs (g);
3728 tree type = TREE_TYPE (r->exp);
3729 tree exp = r->exp;
3730 if (POINTER_TYPE_P (type)
3731 || TREE_CODE (type) == OFFSET_TYPE
3732 || (id >= l && !useless_type_conversion_p (type1, type)))
3734 tree type3 = id >= l ? type1 : pointer_sized_int_node;
3735 g = gimple_build_assign (make_ssa_name (type3), NOP_EXPR, exp);
3736 gimple_seq_add_stmt_without_update (&seq, g);
3737 exp = gimple_assign_lhs (g);
3739 if ((b % 4) == 3)
3740 code = r->in_p ? BIT_IOR_EXPR : BIT_AND_EXPR;
3741 else
3742 code = (b % 4) == 1 ? BIT_AND_EXPR : BIT_IOR_EXPR;
3743 g = gimple_build_assign (make_ssa_name (id >= l ? type1 : type2),
3744 code, op, exp);
3745 gimple_seq_add_stmt_without_update (&seq, g);
3746 op = gimple_assign_lhs (g);
3748 type1 = TREE_TYPE (ranges[k - 1].exp);
3749 if (POINTER_TYPE_P (type1) || TREE_CODE (type1) == OFFSET_TYPE)
3751 gimple *g
3752 = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
3753 gimple_seq_add_stmt_without_update (&seq, g);
3754 op = gimple_assign_lhs (g);
3756 candidates.pop ();
3757 if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
3758 candidates.length (), opcode, ops, op,
3759 seq, ranges[k - 1].in_p, ranges[k - 1].low,
3760 ranges[k - 1].high, strict_overflow_p))
3761 any_changes = true;
3762 else
3763 gimple_seq_discard (seq);
3764 if ((b % 4) == 2 && buckets[b] != i)
3765 /* There is more work to do for this bucket. */
3766 b--;
3769 return any_changes;
3772 /* Attempt to optimize for signed a and b where b is known to be >= 0:
3773 a >= 0 && a < b into (unsigned) a < (unsigned) b
3774 a >= 0 && a <= b into (unsigned) a <= (unsigned) b */
3776 static bool
3777 optimize_range_tests_var_bound (enum tree_code opcode, int first, int length,
3778 vec<operand_entry *> *ops,
3779 struct range_entry *ranges,
3780 basic_block first_bb)
3782 int i;
3783 bool any_changes = false;
3784 hash_map<tree, int> *map = NULL;
3786 for (i = first; i < length; i++)
3788 if (ranges[i].exp == NULL_TREE
3789 || TREE_CODE (ranges[i].exp) != SSA_NAME
3790 || !ranges[i].in_p)
3791 continue;
3793 tree type = TREE_TYPE (ranges[i].exp);
3794 if (!INTEGRAL_TYPE_P (type)
3795 || TYPE_UNSIGNED (type)
3796 || ranges[i].low == NULL_TREE
3797 || !integer_zerop (ranges[i].low)
3798 || ranges[i].high != NULL_TREE)
3799 continue;
3800 /* EXP >= 0 here. */
3801 if (map == NULL)
3802 map = new hash_map <tree, int>;
3803 map->put (ranges[i].exp, i);
3806 if (map == NULL)
3807 return false;
3809 for (i = 0; i < length; i++)
3811 bool in_p = ranges[i].in_p;
3812 if (ranges[i].low == NULL_TREE
3813 || ranges[i].high == NULL_TREE)
3814 continue;
3815 if (!integer_zerop (ranges[i].low)
3816 || !integer_zerop (ranges[i].high))
3818 if (ranges[i].exp
3819 && TYPE_PRECISION (TREE_TYPE (ranges[i].exp)) == 1
3820 && TYPE_UNSIGNED (TREE_TYPE (ranges[i].exp))
3821 && integer_onep (ranges[i].low)
3822 && integer_onep (ranges[i].high))
3823 in_p = !in_p;
3824 else
3825 continue;
3828 gimple *stmt;
3829 tree_code ccode;
3830 tree rhs1, rhs2;
3831 if (ranges[i].exp)
3833 if (TREE_CODE (ranges[i].exp) != SSA_NAME)
3834 continue;
3835 stmt = SSA_NAME_DEF_STMT (ranges[i].exp);
3836 if (!is_gimple_assign (stmt))
3837 continue;
3838 ccode = gimple_assign_rhs_code (stmt);
3839 rhs1 = gimple_assign_rhs1 (stmt);
3840 rhs2 = gimple_assign_rhs2 (stmt);
3842 else
3844 operand_entry *oe = (*ops)[ranges[i].idx];
3845 stmt = last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id));
3846 if (gimple_code (stmt) != GIMPLE_COND)
3847 continue;
3848 ccode = gimple_cond_code (stmt);
3849 rhs1 = gimple_cond_lhs (stmt);
3850 rhs2 = gimple_cond_rhs (stmt);
3853 if (TREE_CODE (rhs1) != SSA_NAME
3854 || rhs2 == NULL_TREE
3855 || TREE_CODE (rhs2) != SSA_NAME)
3856 continue;
3858 switch (ccode)
3860 case GT_EXPR:
3861 case GE_EXPR:
3862 case LT_EXPR:
3863 case LE_EXPR:
3864 break;
3865 default:
3866 continue;
3868 if (in_p)
3869 ccode = invert_tree_comparison (ccode, false);
3870 switch (ccode)
3872 case GT_EXPR:
3873 case GE_EXPR:
3874 std::swap (rhs1, rhs2);
3875 ccode = swap_tree_comparison (ccode);
3876 break;
3877 case LT_EXPR:
3878 case LE_EXPR:
3879 break;
3880 default:
3881 gcc_unreachable ();
3884 int *idx = map->get (rhs1);
3885 if (idx == NULL)
3886 continue;
3888 /* maybe_optimize_range_tests allows statements without side-effects
3889 in the basic blocks as long as they are consumed in the same bb.
3890 Make sure rhs2's def stmt is not among them, otherwise we can't
3891 use safely get_nonzero_bits on it. E.g. in:
3892 # RANGE [-83, 1] NONZERO 173
3893 # k_32 = PHI <k_47(13), k_12(9)>
3895 if (k_32 >= 0)
3896 goto <bb 5>; [26.46%]
3897 else
3898 goto <bb 9>; [73.54%]
3900 <bb 5> [local count: 140323371]:
3901 # RANGE [0, 1] NONZERO 1
3902 _5 = (int) k_32;
3903 # RANGE [0, 4] NONZERO 4
3904 _21 = _5 << 2;
3905 # RANGE [0, 4] NONZERO 4
3906 iftmp.0_44 = (char) _21;
3907 if (k_32 < iftmp.0_44)
3908 goto <bb 6>; [84.48%]
3909 else
3910 goto <bb 9>; [15.52%]
3911 the ranges on _5/_21/iftmp.0_44 are flow sensitive, assume that
3912 k_32 >= 0. If we'd optimize k_32 >= 0 to true and k_32 < iftmp.0_44
3913 to (unsigned) k_32 < (unsigned) iftmp.0_44, then we would execute
3914 those stmts even for negative k_32 and the value ranges would be no
3915 longer guaranteed and so the optimization would be invalid. */
3916 while (opcode == ERROR_MARK)
3918 gimple *g = SSA_NAME_DEF_STMT (rhs2);
3919 basic_block bb2 = gimple_bb (g);
3920 if (bb2
3921 && bb2 != first_bb
3922 && dominated_by_p (CDI_DOMINATORS, bb2, first_bb))
3924 /* As an exception, handle a few common cases. */
3925 if (gimple_assign_cast_p (g)
3926 && INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (g))))
3928 tree op0 = gimple_assign_rhs1 (g);
3929 if (TYPE_UNSIGNED (TREE_TYPE (op0))
3930 && (TYPE_PRECISION (TREE_TYPE (rhs2))
3931 > TYPE_PRECISION (TREE_TYPE (op0))))
3932 /* Zero-extension is always ok. */
3933 break;
3934 else if (TYPE_PRECISION (TREE_TYPE (rhs2))
3935 == TYPE_PRECISION (TREE_TYPE (op0))
3936 && TREE_CODE (op0) == SSA_NAME)
3938 /* Cast from signed to unsigned or vice versa. Retry
3939 with the op0 as new rhs2. */
3940 rhs2 = op0;
3941 continue;
3944 else if (is_gimple_assign (g)
3945 && gimple_assign_rhs_code (g) == BIT_AND_EXPR
3946 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
3947 && !wi::neg_p (wi::to_wide (gimple_assign_rhs2 (g))))
3948 /* Masking with INTEGER_CST with MSB clear is always ok
3949 too. */
3950 break;
3951 rhs2 = NULL_TREE;
3953 break;
3955 if (rhs2 == NULL_TREE)
3956 continue;
3958 wide_int nz = get_nonzero_bits (rhs2);
3959 if (wi::neg_p (nz))
3960 continue;
3962 /* We have EXP < RHS2 or EXP <= RHS2 where EXP >= 0
3963 and RHS2 is known to be RHS2 >= 0. */
3964 tree utype = unsigned_type_for (TREE_TYPE (rhs1));
3966 enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON;
3967 if ((ranges[*idx].strict_overflow_p
3968 || ranges[i].strict_overflow_p)
3969 && issue_strict_overflow_warning (wc))
3970 warning_at (gimple_location (stmt), OPT_Wstrict_overflow,
3971 "assuming signed overflow does not occur "
3972 "when simplifying range test");
3974 if (dump_file && (dump_flags & TDF_DETAILS))
3976 struct range_entry *r = &ranges[*idx];
3977 fprintf (dump_file, "Optimizing range test ");
3978 print_generic_expr (dump_file, r->exp);
3979 fprintf (dump_file, " +[");
3980 print_generic_expr (dump_file, r->low);
3981 fprintf (dump_file, ", ");
3982 print_generic_expr (dump_file, r->high);
3983 fprintf (dump_file, "] and comparison ");
3984 print_generic_expr (dump_file, rhs1);
3985 fprintf (dump_file, " %s ", op_symbol_code (ccode));
3986 print_generic_expr (dump_file, rhs2);
3987 fprintf (dump_file, "\n into (");
3988 print_generic_expr (dump_file, utype);
3989 fprintf (dump_file, ") ");
3990 print_generic_expr (dump_file, rhs1);
3991 fprintf (dump_file, " %s (", op_symbol_code (ccode));
3992 print_generic_expr (dump_file, utype);
3993 fprintf (dump_file, ") ");
3994 print_generic_expr (dump_file, rhs2);
3995 fprintf (dump_file, "\n");
3998 operand_entry *oe = (*ops)[ranges[i].idx];
3999 ranges[i].in_p = 0;
4000 if (opcode == BIT_IOR_EXPR
4001 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4003 ranges[i].in_p = 1;
4004 ccode = invert_tree_comparison (ccode, false);
4007 unsigned int uid = gimple_uid (stmt);
4008 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4009 gimple *g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs1);
4010 gimple_set_uid (g, uid);
4011 rhs1 = gimple_assign_lhs (g);
4012 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4013 if (!useless_type_conversion_p (utype, TREE_TYPE (rhs2)))
4015 g = gimple_build_assign (make_ssa_name (utype), NOP_EXPR, rhs2);
4016 gimple_set_uid (g, uid);
4017 rhs2 = gimple_assign_lhs (g);
4018 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4020 if (tree_swap_operands_p (rhs1, rhs2))
4022 std::swap (rhs1, rhs2);
4023 ccode = swap_tree_comparison (ccode);
4025 if (gimple_code (stmt) == GIMPLE_COND)
4027 gcond *c = as_a <gcond *> (stmt);
4028 gimple_cond_set_code (c, ccode);
4029 gimple_cond_set_lhs (c, rhs1);
4030 gimple_cond_set_rhs (c, rhs2);
4031 update_stmt (stmt);
4033 else
4035 tree ctype = oe->op ? TREE_TYPE (oe->op) : boolean_type_node;
4036 if (!INTEGRAL_TYPE_P (ctype)
4037 || (TREE_CODE (ctype) != BOOLEAN_TYPE
4038 && TYPE_PRECISION (ctype) != 1))
4039 ctype = boolean_type_node;
4040 g = gimple_build_assign (make_ssa_name (ctype), ccode, rhs1, rhs2);
4041 gimple_set_uid (g, uid);
4042 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4043 if (oe->op && ctype != TREE_TYPE (oe->op))
4045 g = gimple_build_assign (make_ssa_name (TREE_TYPE (oe->op)),
4046 NOP_EXPR, gimple_assign_lhs (g));
4047 gimple_set_uid (g, uid);
4048 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4050 ranges[i].exp = gimple_assign_lhs (g);
4051 oe->op = ranges[i].exp;
4052 ranges[i].low = build_zero_cst (TREE_TYPE (ranges[i].exp));
4053 ranges[i].high = ranges[i].low;
4055 ranges[i].strict_overflow_p = false;
4056 oe = (*ops)[ranges[*idx].idx];
4057 /* Now change all the other range test immediate uses, so that
4058 those tests will be optimized away. */
4059 if (opcode == ERROR_MARK)
4061 if (oe->op)
4062 oe->op = build_int_cst (TREE_TYPE (oe->op),
4063 oe->rank == BIT_IOR_EXPR ? 0 : 1);
4064 else
4065 oe->op = (oe->rank == BIT_IOR_EXPR
4066 ? boolean_false_node : boolean_true_node);
4068 else
4069 oe->op = error_mark_node;
4070 ranges[*idx].exp = NULL_TREE;
4071 ranges[*idx].low = NULL_TREE;
4072 ranges[*idx].high = NULL_TREE;
4073 any_changes = true;
4076 delete map;
4077 return any_changes;
4080 /* Optimize range tests, similarly how fold_range_test optimizes
4081 it on trees. The tree code for the binary
4082 operation between all the operands is OPCODE.
4083 If OPCODE is ERROR_MARK, optimize_range_tests is called from within
4084 maybe_optimize_range_tests for inter-bb range optimization.
4085 In that case if oe->op is NULL, oe->id is bb->index whose
4086 GIMPLE_COND is && or ||ed into the test, and oe->rank says
4087 the actual opcode.
4088 FIRST_BB is the first basic block if OPCODE is ERROR_MARK. */
4090 static bool
4091 optimize_range_tests (enum tree_code opcode,
4092 vec<operand_entry *> *ops, basic_block first_bb)
4094 unsigned int length = ops->length (), i, j, first;
4095 operand_entry *oe;
4096 struct range_entry *ranges;
4097 bool any_changes = false;
4099 if (length == 1)
4100 return false;
4102 ranges = XNEWVEC (struct range_entry, length);
4103 for (i = 0; i < length; i++)
4105 oe = (*ops)[i];
4106 ranges[i].idx = i;
4107 init_range_entry (ranges + i, oe->op,
4108 oe->op
4109 ? NULL
4110 : last_nondebug_stmt (BASIC_BLOCK_FOR_FN (cfun, oe->id)));
4111 /* For | invert it now, we will invert it again before emitting
4112 the optimized expression. */
4113 if (opcode == BIT_IOR_EXPR
4114 || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR))
4115 ranges[i].in_p = !ranges[i].in_p;
4118 qsort (ranges, length, sizeof (*ranges), range_entry_cmp);
4119 for (i = 0; i < length; i++)
4120 if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME)
4121 break;
4123 /* Try to merge ranges. */
4124 for (first = i; i < length; i++)
4126 tree low = ranges[i].low;
4127 tree high = ranges[i].high;
4128 int in_p = ranges[i].in_p;
4129 bool strict_overflow_p = ranges[i].strict_overflow_p;
4130 int update_fail_count = 0;
4132 for (j = i + 1; j < length; j++)
4134 if (ranges[i].exp != ranges[j].exp)
4135 break;
4136 if (!merge_ranges (&in_p, &low, &high, in_p, low, high,
4137 ranges[j].in_p, ranges[j].low, ranges[j].high))
4138 break;
4139 strict_overflow_p |= ranges[j].strict_overflow_p;
4142 if (j == i + 1)
4143 continue;
4145 if (update_range_test (ranges + i, ranges + i + 1, NULL, j - i - 1,
4146 opcode, ops, ranges[i].exp, NULL, in_p,
4147 low, high, strict_overflow_p))
4149 i = j - 1;
4150 any_changes = true;
4152 /* Avoid quadratic complexity if all merge_ranges calls would succeed,
4153 while update_range_test would fail. */
4154 else if (update_fail_count == 64)
4155 i = j - 1;
4156 else
4157 ++update_fail_count;
4160 any_changes |= optimize_range_tests_1 (opcode, first, length, true,
4161 ops, ranges);
4163 if (BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2)
4164 any_changes |= optimize_range_tests_1 (opcode, first, length, false,
4165 ops, ranges);
4166 if (lshift_cheap_p (optimize_function_for_speed_p (cfun)))
4167 any_changes |= optimize_range_tests_to_bit_test (opcode, first, length,
4168 ops, ranges);
4169 any_changes |= optimize_range_tests_var_bound (opcode, first, length, ops,
4170 ranges, first_bb);
4171 any_changes |= optimize_range_tests_cmp_bitwise (opcode, first, length,
4172 ops, ranges);
4174 if (any_changes && opcode != ERROR_MARK)
4176 j = 0;
4177 FOR_EACH_VEC_ELT (*ops, i, oe)
4179 if (oe->op == error_mark_node)
4180 continue;
4181 else if (i != j)
4182 (*ops)[j] = oe;
4183 j++;
4185 ops->truncate (j);
4188 XDELETEVEC (ranges);
4189 return any_changes;
4192 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
4193 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
4194 otherwise the comparison code. TYPE is a return value that is set
4195 to type of comparison. */
4197 static tree_code
4198 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
4199 tree *lhs, tree *rhs, gassign **vcond)
4201 if (TREE_CODE (var) != SSA_NAME)
4202 return ERROR_MARK;
4204 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
4205 if (stmt == NULL)
4206 return ERROR_MARK;
4207 if (vcond)
4208 *vcond = stmt;
4210 /* ??? If we start creating more COND_EXPR, we could perform
4211 this same optimization with them. For now, simplify. */
4212 if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
4213 return ERROR_MARK;
4215 tree cond = gimple_assign_rhs1 (stmt);
4216 tree_code cmp = TREE_CODE (cond);
4217 if (cmp != SSA_NAME)
4218 return ERROR_MARK;
4220 gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
4221 if (assign == NULL
4222 || TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
4223 return ERROR_MARK;
4225 cmp = gimple_assign_rhs_code (assign);
4226 if (lhs)
4227 *lhs = gimple_assign_rhs1 (assign);
4228 if (rhs)
4229 *rhs = gimple_assign_rhs2 (assign);
4231 /* ??? For now, allow only canonical true and false result vectors.
4232 We could expand this to other constants should the need arise,
4233 but at the moment we don't create them. */
4234 tree t = gimple_assign_rhs2 (stmt);
4235 tree f = gimple_assign_rhs3 (stmt);
4236 bool inv;
4237 if (integer_all_onesp (t))
4238 inv = false;
4239 else if (integer_all_onesp (f))
4241 cmp = invert_tree_comparison (cmp, false);
4242 inv = true;
4244 else
4245 return ERROR_MARK;
4246 if (!integer_zerop (f))
4247 return ERROR_MARK;
4249 /* Success! */
4250 if (rets)
4251 *rets = assign;
4252 if (reti)
4253 *reti = inv;
4254 if (type)
4255 *type = TREE_TYPE (cond);
4256 return cmp;
4259 /* Optimize the condition of VEC_COND_EXPRs which have been combined
4260 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
4262 static bool
4263 optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
4265 unsigned int length = ops->length (), i, j;
4266 bool any_changes = false;
4268 if (length == 1)
4269 return false;
4271 for (i = 0; i < length; ++i)
4273 tree elt0 = (*ops)[i]->op;
4275 gassign *stmt0, *vcond0;
4276 bool invert;
4277 tree type, lhs0, rhs0;
4278 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
4279 &rhs0, &vcond0);
4280 if (cmp0 == ERROR_MARK)
4281 continue;
4283 for (j = i + 1; j < length; ++j)
4285 tree &elt1 = (*ops)[j]->op;
4287 gassign *stmt1, *vcond1;
4288 tree lhs1, rhs1;
4289 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
4290 &rhs1, &vcond1);
4291 if (cmp1 == ERROR_MARK)
4292 continue;
4294 tree comb;
4295 if (opcode == BIT_AND_EXPR)
4296 comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
4297 cmp1, lhs1, rhs1);
4298 else if (opcode == BIT_IOR_EXPR)
4299 comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
4300 cmp1, lhs1, rhs1);
4301 else
4302 gcc_unreachable ();
4303 if (comb == NULL)
4304 continue;
4306 /* Success! */
4307 if (dump_file && (dump_flags & TDF_DETAILS))
4309 fprintf (dump_file, "Transforming ");
4310 print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
4311 fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
4312 print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
4313 fprintf (dump_file, " into ");
4314 print_generic_expr (dump_file, comb);
4315 fputc ('\n', dump_file);
4318 gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
4319 tree exp = force_gimple_operand_gsi (&gsi, comb, true, NULL_TREE,
4320 true, GSI_SAME_STMT);
4321 if (invert)
4322 swap_ssa_operands (vcond0, gimple_assign_rhs2_ptr (vcond0),
4323 gimple_assign_rhs3_ptr (vcond0));
4324 gimple_assign_set_rhs1 (vcond0, exp);
4325 update_stmt (vcond0);
4327 elt1 = error_mark_node;
4328 any_changes = true;
4332 if (any_changes)
4334 operand_entry *oe;
4335 j = 0;
4336 FOR_EACH_VEC_ELT (*ops, i, oe)
4338 if (oe->op == error_mark_node)
4339 continue;
4340 else if (i != j)
4341 (*ops)[j] = oe;
4342 j++;
4344 ops->truncate (j);
4347 return any_changes;
4350 /* Return true if STMT is a cast like:
4351 <bb N>:
4353 _123 = (int) _234;
4355 <bb M>:
4356 # _345 = PHI <_123(N), 1(...), 1(...)>
4357 where _234 has bool type, _123 has single use and
4358 bb N has a single successor M. This is commonly used in
4359 the last block of a range test.
4361 Also Return true if STMT is tcc_compare like:
4362 <bb N>:
4364 _234 = a_2(D) == 2;
4366 <bb M>:
4367 # _345 = PHI <_234(N), 1(...), 1(...)>
4368 _346 = (int) _345;
4369 where _234 has booltype, single use and
4370 bb N has a single successor M. This is commonly used in
4371 the last block of a range test. */
4373 static bool
4374 final_range_test_p (gimple *stmt)
4376 basic_block bb, rhs_bb, lhs_bb;
4377 edge e;
4378 tree lhs, rhs;
4379 use_operand_p use_p;
4380 gimple *use_stmt;
4382 if (!gimple_assign_cast_p (stmt)
4383 && (!is_gimple_assign (stmt)
4384 || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4385 != tcc_comparison)))
4386 return false;
4387 bb = gimple_bb (stmt);
4388 if (!single_succ_p (bb))
4389 return false;
4390 e = single_succ_edge (bb);
4391 if (e->flags & EDGE_COMPLEX)
4392 return false;
4394 lhs = gimple_assign_lhs (stmt);
4395 rhs = gimple_assign_rhs1 (stmt);
4396 if (gimple_assign_cast_p (stmt)
4397 && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4398 || TREE_CODE (rhs) != SSA_NAME
4399 || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
4400 return false;
4402 if (!gimple_assign_cast_p (stmt)
4403 && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
4404 return false;
4406 /* Test whether lhs is consumed only by a PHI in the only successor bb. */
4407 if (!single_imm_use (lhs, &use_p, &use_stmt))
4408 return false;
4410 if (gimple_code (use_stmt) != GIMPLE_PHI
4411 || gimple_bb (use_stmt) != e->dest)
4412 return false;
4414 /* And that the rhs is defined in the same loop. */
4415 if (gimple_assign_cast_p (stmt))
4417 if (TREE_CODE (rhs) != SSA_NAME
4418 || !(rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)))
4419 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
4420 return false;
4422 else
4424 if (TREE_CODE (lhs) != SSA_NAME
4425 || !(lhs_bb = gimple_bb (SSA_NAME_DEF_STMT (lhs)))
4426 || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), lhs_bb))
4427 return false;
4430 return true;
4433 /* Return true if BB is suitable basic block for inter-bb range test
4434 optimization. If BACKWARD is true, BB should be the only predecessor
4435 of TEST_BB, and *OTHER_BB is either NULL and filled by the routine,
4436 or compared with to find a common basic block to which all conditions
4437 branch to if true resp. false. If BACKWARD is false, TEST_BB should
4438 be the only predecessor of BB. *TEST_SWAPPED_P is set to true if
4439 TEST_BB is a bb ending in condition where the edge to non-*OTHER_BB
4440 block points to an empty block that falls through into *OTHER_BB and
4441 the phi args match that path. */
4443 static bool
4444 suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb,
4445 bool *test_swapped_p, bool backward)
4447 edge_iterator ei, ei2;
4448 edge e, e2;
4449 gimple *stmt;
4450 gphi_iterator gsi;
4451 bool other_edge_seen = false;
4452 bool is_cond;
4454 if (test_bb == bb)
4455 return false;
4456 /* Check last stmt first. */
4457 stmt = last_nondebug_stmt (bb);
4458 if (stmt == NULL
4459 || (gimple_code (stmt) != GIMPLE_COND
4460 && (backward || !final_range_test_p (stmt)))
4461 || gimple_visited_p (stmt)
4462 || stmt_could_throw_p (cfun, stmt)
4463 || *other_bb == bb)
4464 return false;
4465 is_cond = gimple_code (stmt) == GIMPLE_COND;
4466 if (is_cond)
4468 /* If last stmt is GIMPLE_COND, verify that one of the succ edges
4469 goes to the next bb (if BACKWARD, it is TEST_BB), and the other
4470 to *OTHER_BB (if not set yet, try to find it out). */
4471 if (EDGE_COUNT (bb->succs) != 2)
4472 return false;
4473 FOR_EACH_EDGE (e, ei, bb->succs)
4475 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4476 return false;
4477 if (e->dest == test_bb)
4479 if (backward)
4480 continue;
4481 else
4482 return false;
4484 if (e->dest == bb)
4485 return false;
4486 if (*other_bb == NULL)
4488 FOR_EACH_EDGE (e2, ei2, test_bb->succs)
4489 if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)))
4490 return false;
4491 else if (e->dest == e2->dest)
4492 *other_bb = e->dest;
4493 if (*other_bb == NULL)
4494 return false;
4496 if (e->dest == *other_bb)
4497 other_edge_seen = true;
4498 else if (backward)
4499 return false;
4501 if (*other_bb == NULL || !other_edge_seen)
4502 return false;
4504 else if (single_succ (bb) != *other_bb)
4505 return false;
4507 /* Now check all PHIs of *OTHER_BB. */
4508 e = find_edge (bb, *other_bb);
4509 e2 = find_edge (test_bb, *other_bb);
4510 retry:;
4511 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
4513 gphi *phi = gsi.phi ();
4514 /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments
4515 corresponding to BB and TEST_BB predecessor must be the same. */
4516 if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx),
4517 gimple_phi_arg_def (phi, e2->dest_idx), 0))
4519 /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND,
4520 one of the PHIs should have the lhs of the last stmt in
4521 that block as PHI arg and that PHI should have 0 or 1
4522 corresponding to it in all other range test basic blocks
4523 considered. */
4524 if (!is_cond)
4526 if (gimple_phi_arg_def (phi, e->dest_idx)
4527 == gimple_assign_lhs (stmt)
4528 && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx))
4529 || integer_onep (gimple_phi_arg_def (phi,
4530 e2->dest_idx))))
4531 continue;
4533 else
4535 gimple *test_last = last_nondebug_stmt (test_bb);
4536 if (gimple_code (test_last) == GIMPLE_COND)
4538 if (backward ? e2->src != test_bb : e->src != bb)
4539 return false;
4541 /* For last_bb, handle also:
4542 if (x_3(D) == 3)
4543 goto <bb 6>; [34.00%]
4544 else
4545 goto <bb 7>; [66.00%]
4547 <bb 6> [local count: 79512730]:
4549 <bb 7> [local count: 1073741824]:
4550 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4551 where bb 7 is *OTHER_BB, but the PHI values from the
4552 earlier bbs match the path through the empty bb
4553 in between. */
4554 edge e3;
4555 if (backward)
4556 e3 = EDGE_SUCC (test_bb,
4557 e2 == EDGE_SUCC (test_bb, 0) ? 1 : 0);
4558 else
4559 e3 = EDGE_SUCC (bb,
4560 e == EDGE_SUCC (bb, 0) ? 1 : 0);
4561 if (empty_block_p (e3->dest)
4562 && single_succ_p (e3->dest)
4563 && single_succ (e3->dest) == *other_bb
4564 && single_pred_p (e3->dest)
4565 && single_succ_edge (e3->dest)->flags == EDGE_FALLTHRU)
4567 if (backward)
4568 e2 = single_succ_edge (e3->dest);
4569 else
4570 e = single_succ_edge (e3->dest);
4571 if (test_swapped_p)
4572 *test_swapped_p = true;
4573 goto retry;
4576 else if (gimple_phi_arg_def (phi, e2->dest_idx)
4577 == gimple_assign_lhs (test_last)
4578 && (integer_zerop (gimple_phi_arg_def (phi,
4579 e->dest_idx))
4580 || integer_onep (gimple_phi_arg_def (phi,
4581 e->dest_idx))))
4582 continue;
4585 return false;
4588 return true;
4591 /* Return true if BB doesn't have side-effects that would disallow
4592 range test optimization, all SSA_NAMEs set in the bb are consumed
4593 in the bb and there are no PHIs. */
4595 bool
4596 no_side_effect_bb (basic_block bb)
4598 gimple_stmt_iterator gsi;
4599 gimple *last;
4601 if (!gimple_seq_empty_p (phi_nodes (bb)))
4602 return false;
4603 last = last_nondebug_stmt (bb);
4604 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4606 gimple *stmt = gsi_stmt (gsi);
4607 tree lhs;
4608 imm_use_iterator imm_iter;
4609 use_operand_p use_p;
4611 if (is_gimple_debug (stmt))
4612 continue;
4613 if (gimple_has_side_effects (stmt))
4614 return false;
4615 if (stmt == last)
4616 return true;
4617 if (!is_gimple_assign (stmt))
4618 return false;
4619 lhs = gimple_assign_lhs (stmt);
4620 if (TREE_CODE (lhs) != SSA_NAME)
4621 return false;
4622 if (gimple_assign_rhs_could_trap_p (stmt))
4623 return false;
4624 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
4626 gimple *use_stmt = USE_STMT (use_p);
4627 if (is_gimple_debug (use_stmt))
4628 continue;
4629 if (gimple_bb (use_stmt) != bb)
4630 return false;
4633 return false;
4636 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
4637 return true and fill in *OPS recursively. */
4639 static bool
4640 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
4641 class loop *loop)
4643 gimple *stmt = SSA_NAME_DEF_STMT (var);
4644 tree rhs[2];
4645 int i;
4647 if (!is_reassociable_op (stmt, code, loop))
4648 return false;
4650 rhs[0] = gimple_assign_rhs1 (stmt);
4651 rhs[1] = gimple_assign_rhs2 (stmt);
4652 gimple_set_visited (stmt, true);
4653 for (i = 0; i < 2; i++)
4654 if (TREE_CODE (rhs[i]) == SSA_NAME
4655 && !get_ops (rhs[i], code, ops, loop)
4656 && has_single_use (rhs[i]))
4658 operand_entry *oe = operand_entry_pool.allocate ();
4660 oe->op = rhs[i];
4661 oe->rank = code;
4662 oe->id = 0;
4663 oe->count = 1;
4664 oe->stmt_to_insert = NULL;
4665 ops->safe_push (oe);
4667 return true;
4670 /* Find the ops that were added by get_ops starting from VAR, see if
4671 they were changed during update_range_test and if yes, create new
4672 stmts. */
4674 static tree
4675 update_ops (tree var, enum tree_code code, const vec<operand_entry *> &ops,
4676 unsigned int *pidx, class loop *loop)
4678 gimple *stmt = SSA_NAME_DEF_STMT (var);
4679 tree rhs[4];
4680 int i;
4682 if (!is_reassociable_op (stmt, code, loop))
4683 return NULL;
4685 rhs[0] = gimple_assign_rhs1 (stmt);
4686 rhs[1] = gimple_assign_rhs2 (stmt);
4687 rhs[2] = rhs[0];
4688 rhs[3] = rhs[1];
4689 for (i = 0; i < 2; i++)
4690 if (TREE_CODE (rhs[i]) == SSA_NAME)
4692 rhs[2 + i] = update_ops (rhs[i], code, ops, pidx, loop);
4693 if (rhs[2 + i] == NULL_TREE)
4695 if (has_single_use (rhs[i]))
4696 rhs[2 + i] = ops[(*pidx)++]->op;
4697 else
4698 rhs[2 + i] = rhs[i];
4701 if ((rhs[2] != rhs[0] || rhs[3] != rhs[1])
4702 && (rhs[2] != rhs[1] || rhs[3] != rhs[0]))
4704 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4705 var = make_ssa_name (TREE_TYPE (var));
4706 gassign *g = gimple_build_assign (var, gimple_assign_rhs_code (stmt),
4707 rhs[2], rhs[3]);
4708 gimple_set_uid (g, gimple_uid (stmt));
4709 gimple_set_visited (g, true);
4710 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4711 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4712 if (fold_stmt_inplace (&gsi2))
4713 update_stmt (g);
4715 return var;
4718 /* Structure to track the initial value passed to get_ops and
4719 the range in the ops vector for each basic block. */
4721 struct inter_bb_range_test_entry
4723 tree op;
4724 unsigned int first_idx, last_idx;
4727 /* Inter-bb range test optimization.
4729 Returns TRUE if a gimple conditional is optimized to a true/false,
4730 otherwise return FALSE.
4732 This indicates to the caller that it should run a CFG cleanup pass
4733 once reassociation is completed. */
4735 static bool
4736 maybe_optimize_range_tests (gimple *stmt)
4738 basic_block first_bb = gimple_bb (stmt);
4739 basic_block last_bb = first_bb;
4740 basic_block other_bb = NULL;
4741 basic_block bb;
4742 edge_iterator ei;
4743 edge e;
4744 auto_vec<operand_entry *> ops;
4745 auto_vec<inter_bb_range_test_entry> bbinfo;
4746 bool any_changes = false;
4747 bool cfg_cleanup_needed = false;
4749 /* Consider only basic blocks that end with GIMPLE_COND or
4750 a cast statement satisfying final_range_test_p. All
4751 but the last bb in the first_bb .. last_bb range
4752 should end with GIMPLE_COND. */
4753 if (gimple_code (stmt) == GIMPLE_COND)
4755 if (EDGE_COUNT (first_bb->succs) != 2)
4756 return cfg_cleanup_needed;
4758 else if (final_range_test_p (stmt))
4759 other_bb = single_succ (first_bb);
4760 else
4761 return cfg_cleanup_needed;
4763 if (stmt_could_throw_p (cfun, stmt))
4764 return cfg_cleanup_needed;
4766 /* As relative ordering of post-dominator sons isn't fixed,
4767 maybe_optimize_range_tests can be called first on any
4768 bb in the range we want to optimize. So, start searching
4769 backwards, if first_bb can be set to a predecessor. */
4770 while (single_pred_p (first_bb))
4772 basic_block pred_bb = single_pred (first_bb);
4773 if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, NULL, true))
4774 break;
4775 if (!no_side_effect_bb (first_bb))
4776 break;
4777 first_bb = pred_bb;
4779 /* If first_bb is last_bb, other_bb hasn't been computed yet.
4780 Before starting forward search in last_bb successors, find
4781 out the other_bb. */
4782 if (first_bb == last_bb)
4784 other_bb = NULL;
4785 /* As non-GIMPLE_COND last stmt always terminates the range,
4786 if forward search didn't discover anything, just give up. */
4787 if (gimple_code (stmt) != GIMPLE_COND)
4788 return cfg_cleanup_needed;
4789 /* Look at both successors. Either it ends with a GIMPLE_COND
4790 and satisfies suitable_cond_bb, or ends with a cast and
4791 other_bb is that cast's successor. */
4792 FOR_EACH_EDGE (e, ei, first_bb->succs)
4793 if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))
4794 || e->dest == first_bb)
4795 return cfg_cleanup_needed;
4796 else if (single_pred_p (e->dest))
4798 stmt = last_nondebug_stmt (e->dest);
4799 if (stmt
4800 && gimple_code (stmt) == GIMPLE_COND
4801 && EDGE_COUNT (e->dest->succs) == 2)
4803 if (suitable_cond_bb (first_bb, e->dest, &other_bb,
4804 NULL, true))
4805 break;
4806 else
4807 other_bb = NULL;
4809 else if (stmt
4810 && final_range_test_p (stmt)
4811 && find_edge (first_bb, single_succ (e->dest)))
4813 other_bb = single_succ (e->dest);
4814 if (other_bb == first_bb)
4815 other_bb = NULL;
4818 if (other_bb == NULL)
4819 return cfg_cleanup_needed;
4821 /* Now do the forward search, moving last_bb to successor bbs
4822 that aren't other_bb. */
4823 while (EDGE_COUNT (last_bb->succs) == 2)
4825 FOR_EACH_EDGE (e, ei, last_bb->succs)
4826 if (e->dest != other_bb)
4827 break;
4828 if (e == NULL)
4829 break;
4830 if (!single_pred_p (e->dest))
4831 break;
4832 if (!suitable_cond_bb (e->dest, last_bb, &other_bb, NULL, false))
4833 break;
4834 if (!no_side_effect_bb (e->dest))
4835 break;
4836 last_bb = e->dest;
4838 if (first_bb == last_bb)
4839 return cfg_cleanup_needed;
4840 /* Here basic blocks first_bb through last_bb's predecessor
4841 end with GIMPLE_COND, all of them have one of the edges to
4842 other_bb and another to another block in the range,
4843 all blocks except first_bb don't have side-effects and
4844 last_bb ends with either GIMPLE_COND, or cast satisfying
4845 final_range_test_p. */
4846 for (bb = last_bb; ; bb = single_pred (bb))
4848 enum tree_code code;
4849 tree lhs, rhs;
4850 inter_bb_range_test_entry bb_ent;
4852 bb_ent.op = NULL_TREE;
4853 bb_ent.first_idx = ops.length ();
4854 bb_ent.last_idx = bb_ent.first_idx;
4855 e = find_edge (bb, other_bb);
4856 stmt = last_nondebug_stmt (bb);
4857 gimple_set_visited (stmt, true);
4858 if (gimple_code (stmt) != GIMPLE_COND)
4860 use_operand_p use_p;
4861 gimple *phi;
4862 edge e2;
4863 unsigned int d;
4865 lhs = gimple_assign_lhs (stmt);
4866 rhs = gimple_assign_rhs1 (stmt);
4867 gcc_assert (bb == last_bb);
4869 /* stmt is
4870 _123 = (int) _234;
4872 _234 = a_2(D) == 2;
4874 followed by:
4875 <bb M>:
4876 # _345 = PHI <_123(N), 1(...), 1(...)>
4878 or 0 instead of 1. If it is 0, the _234
4879 range test is anded together with all the
4880 other range tests, if it is 1, it is ored with
4881 them. */
4882 single_imm_use (lhs, &use_p, &phi);
4883 gcc_assert (gimple_code (phi) == GIMPLE_PHI);
4884 e2 = find_edge (first_bb, other_bb);
4885 d = e2->dest_idx;
4886 gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs);
4887 if (integer_zerop (gimple_phi_arg_def (phi, d)))
4888 code = BIT_AND_EXPR;
4889 else
4891 gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d)));
4892 code = BIT_IOR_EXPR;
4895 /* If _234 SSA_NAME_DEF_STMT is
4896 _234 = _567 | _789;
4897 (or &, corresponding to 1/0 in the phi arguments,
4898 push into ops the individual range test arguments
4899 of the bitwise or resp. and, recursively. */
4900 if (TREE_CODE (rhs) == SSA_NAME
4901 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4902 != tcc_comparison)
4903 && !get_ops (rhs, code, &ops,
4904 loop_containing_stmt (stmt))
4905 && has_single_use (rhs))
4907 /* Otherwise, push the _234 range test itself. */
4908 operand_entry *oe = operand_entry_pool.allocate ();
4910 oe->op = rhs;
4911 oe->rank = code;
4912 oe->id = 0;
4913 oe->count = 1;
4914 oe->stmt_to_insert = NULL;
4915 ops.safe_push (oe);
4916 bb_ent.last_idx++;
4917 bb_ent.op = rhs;
4919 else if (is_gimple_assign (stmt)
4920 && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
4921 == tcc_comparison)
4922 && !get_ops (lhs, code, &ops,
4923 loop_containing_stmt (stmt))
4924 && has_single_use (lhs))
4926 operand_entry *oe = operand_entry_pool.allocate ();
4927 oe->op = lhs;
4928 oe->rank = code;
4929 oe->id = 0;
4930 oe->count = 1;
4931 ops.safe_push (oe);
4932 bb_ent.last_idx++;
4933 bb_ent.op = lhs;
4935 else
4937 bb_ent.last_idx = ops.length ();
4938 bb_ent.op = rhs;
4940 bbinfo.safe_push (bb_ent);
4941 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
4942 ops[i]->id = bb->index;
4943 continue;
4945 else if (bb == last_bb)
4947 /* For last_bb, handle also:
4948 if (x_3(D) == 3)
4949 goto <bb 6>; [34.00%]
4950 else
4951 goto <bb 7>; [66.00%]
4953 <bb 6> [local count: 79512730]:
4955 <bb 7> [local count: 1073741824]:
4956 # prephitmp_7 = PHI <1(3), 1(4), 0(5), 1(2), 1(6)>
4957 where bb 7 is OTHER_BB, but the PHI values from the
4958 earlier bbs match the path through the empty bb
4959 in between. */
4960 bool test_swapped_p = false;
4961 bool ok = suitable_cond_bb (single_pred (last_bb), last_bb,
4962 &other_bb, &test_swapped_p, true);
4963 gcc_assert (ok);
4964 if (test_swapped_p)
4965 e = EDGE_SUCC (bb, e == EDGE_SUCC (bb, 0) ? 1 : 0);
4967 /* Otherwise stmt is GIMPLE_COND. */
4968 code = gimple_cond_code (stmt);
4969 lhs = gimple_cond_lhs (stmt);
4970 rhs = gimple_cond_rhs (stmt);
4971 if (TREE_CODE (lhs) == SSA_NAME
4972 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4973 && ((code != EQ_EXPR && code != NE_EXPR)
4974 || rhs != boolean_false_node
4975 /* Either push into ops the individual bitwise
4976 or resp. and operands, depending on which
4977 edge is other_bb. */
4978 || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0)
4979 ^ (code == EQ_EXPR))
4980 ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops,
4981 loop_containing_stmt (stmt))))
4983 /* Or push the GIMPLE_COND stmt itself. */
4984 operand_entry *oe = operand_entry_pool.allocate ();
4986 oe->op = NULL;
4987 oe->rank = (e->flags & EDGE_TRUE_VALUE)
4988 ? BIT_IOR_EXPR : BIT_AND_EXPR;
4989 /* oe->op = NULL signs that there is no SSA_NAME
4990 for the range test, and oe->id instead is the
4991 basic block number, at which's end the GIMPLE_COND
4992 is. */
4993 oe->id = bb->index;
4994 oe->count = 1;
4995 oe->stmt_to_insert = NULL;
4996 ops.safe_push (oe);
4997 bb_ent.op = NULL;
4998 bb_ent.last_idx++;
5000 else if (ops.length () > bb_ent.first_idx)
5002 bb_ent.op = lhs;
5003 bb_ent.last_idx = ops.length ();
5005 bbinfo.safe_push (bb_ent);
5006 for (unsigned int i = bb_ent.first_idx; i < bb_ent.last_idx; ++i)
5007 ops[i]->id = bb->index;
5008 if (bb == first_bb)
5009 break;
5011 if (ops.length () > 1)
5012 any_changes = optimize_range_tests (ERROR_MARK, &ops, first_bb);
5013 if (any_changes)
5015 unsigned int idx, max_idx = 0;
5016 /* update_ops relies on has_single_use predicates returning the
5017 same values as it did during get_ops earlier. Additionally it
5018 never removes statements, only adds new ones and it should walk
5019 from the single imm use and check the predicate already before
5020 making those changes.
5021 On the other side, the handling of GIMPLE_COND directly can turn
5022 previously multiply used SSA_NAMEs into single use SSA_NAMEs, so
5023 it needs to be done in a separate loop afterwards. */
5024 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5026 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5027 && bbinfo[idx].op != NULL_TREE)
5029 tree new_op;
5031 max_idx = idx;
5032 stmt = last_nondebug_stmt (bb);
5033 new_op = update_ops (bbinfo[idx].op,
5034 (enum tree_code)
5035 ops[bbinfo[idx].first_idx]->rank,
5036 ops, &bbinfo[idx].first_idx,
5037 loop_containing_stmt (stmt));
5038 if (new_op == NULL_TREE)
5040 gcc_assert (bb == last_bb);
5041 new_op = ops[bbinfo[idx].first_idx++]->op;
5043 if (bbinfo[idx].op != new_op)
5045 imm_use_iterator iter;
5046 use_operand_p use_p;
5047 gimple *use_stmt, *cast_or_tcc_cmp_stmt = NULL;
5049 FOR_EACH_IMM_USE_STMT (use_stmt, iter, bbinfo[idx].op)
5050 if (is_gimple_debug (use_stmt))
5051 continue;
5052 else if (gimple_code (use_stmt) == GIMPLE_COND
5053 || gimple_code (use_stmt) == GIMPLE_PHI)
5054 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5055 SET_USE (use_p, new_op);
5056 else if ((is_gimple_assign (use_stmt)
5057 && (TREE_CODE_CLASS
5058 (gimple_assign_rhs_code (use_stmt))
5059 == tcc_comparison)))
5060 cast_or_tcc_cmp_stmt = use_stmt;
5061 else if (gimple_assign_cast_p (use_stmt))
5062 cast_or_tcc_cmp_stmt = use_stmt;
5063 else
5064 gcc_unreachable ();
5066 if (cast_or_tcc_cmp_stmt)
5068 gcc_assert (bb == last_bb);
5069 tree lhs = gimple_assign_lhs (cast_or_tcc_cmp_stmt);
5070 tree new_lhs = make_ssa_name (TREE_TYPE (lhs));
5071 enum tree_code rhs_code
5072 = gimple_assign_cast_p (cast_or_tcc_cmp_stmt)
5073 ? gimple_assign_rhs_code (cast_or_tcc_cmp_stmt)
5074 : CONVERT_EXPR;
5075 gassign *g;
5076 if (is_gimple_min_invariant (new_op))
5078 new_op = fold_convert (TREE_TYPE (lhs), new_op);
5079 g = gimple_build_assign (new_lhs, new_op);
5081 else
5082 g = gimple_build_assign (new_lhs, rhs_code, new_op);
5083 gimple_stmt_iterator gsi
5084 = gsi_for_stmt (cast_or_tcc_cmp_stmt);
5085 gimple_set_uid (g, gimple_uid (cast_or_tcc_cmp_stmt));
5086 gimple_set_visited (g, true);
5087 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5088 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs)
5089 if (is_gimple_debug (use_stmt))
5090 continue;
5091 else if (gimple_code (use_stmt) == GIMPLE_COND
5092 || gimple_code (use_stmt) == GIMPLE_PHI)
5093 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
5094 SET_USE (use_p, new_lhs);
5095 else
5096 gcc_unreachable ();
5100 if (bb == first_bb)
5101 break;
5103 for (bb = last_bb, idx = 0; ; bb = single_pred (bb), idx++)
5105 if (bbinfo[idx].first_idx < bbinfo[idx].last_idx
5106 && bbinfo[idx].op == NULL_TREE
5107 && ops[bbinfo[idx].first_idx]->op != NULL_TREE)
5109 gcond *cond_stmt = as_a <gcond *> (*gsi_last_bb (bb));
5111 if (idx > max_idx)
5112 max_idx = idx;
5114 /* If we collapse the conditional to a true/false
5115 condition, then bubble that knowledge up to our caller. */
5116 if (integer_zerop (ops[bbinfo[idx].first_idx]->op))
5118 gimple_cond_make_false (cond_stmt);
5119 cfg_cleanup_needed = true;
5121 else if (integer_onep (ops[bbinfo[idx].first_idx]->op))
5123 gimple_cond_make_true (cond_stmt);
5124 cfg_cleanup_needed = true;
5126 else
5128 gimple_cond_set_code (cond_stmt, NE_EXPR);
5129 gimple_cond_set_lhs (cond_stmt,
5130 ops[bbinfo[idx].first_idx]->op);
5131 gimple_cond_set_rhs (cond_stmt, boolean_false_node);
5133 update_stmt (cond_stmt);
5135 if (bb == first_bb)
5136 break;
5139 /* The above changes could result in basic blocks after the first
5140 modified one, up to and including last_bb, to be executed even if
5141 they would not be in the original program. If the value ranges of
5142 assignment lhs' in those bbs were dependent on the conditions
5143 guarding those basic blocks which now can change, the VRs might
5144 be incorrect. As no_side_effect_bb should ensure those SSA_NAMEs
5145 are only used within the same bb, it should be not a big deal if
5146 we just reset all the VRs in those bbs. See PR68671. */
5147 for (bb = last_bb, idx = 0; idx < max_idx; bb = single_pred (bb), idx++)
5148 reset_flow_sensitive_info_in_bb (bb);
5150 return cfg_cleanup_needed;
5153 /* Remove def stmt of VAR if VAR has zero uses and recurse
5154 on rhs1 operand if so. */
5156 static void
5157 remove_visited_stmt_chain (tree var)
5159 gimple *stmt;
5160 gimple_stmt_iterator gsi;
5162 while (1)
5164 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
5165 return;
5166 stmt = SSA_NAME_DEF_STMT (var);
5167 if (is_gimple_assign (stmt) && gimple_visited_p (stmt))
5169 var = gimple_assign_rhs1 (stmt);
5170 gsi = gsi_for_stmt (stmt);
5171 reassoc_remove_stmt (&gsi);
5172 release_defs (stmt);
5174 else
5175 return;
5179 /* This function checks three consequtive operands in
5180 passed operands vector OPS starting from OPINDEX and
5181 swaps two operands if it is profitable for binary operation
5182 consuming OPINDEX + 1 abnd OPINDEX + 2 operands.
5184 We pair ops with the same rank if possible. */
5186 static void
5187 swap_ops_for_binary_stmt (const vec<operand_entry *> &ops,
5188 unsigned int opindex)
5190 operand_entry *oe1, *oe2, *oe3;
5192 oe1 = ops[opindex];
5193 oe2 = ops[opindex + 1];
5194 oe3 = ops[opindex + 2];
5196 if (oe1->rank == oe2->rank && oe2->rank != oe3->rank)
5197 std::swap (*oe1, *oe3);
5198 else if (oe1->rank == oe3->rank && oe2->rank != oe3->rank)
5199 std::swap (*oe1, *oe2);
5202 /* If definition of RHS1 or RHS2 dominates STMT, return the later of those
5203 two definitions, otherwise return STMT. Sets INSERT_BEFORE to indicate
5204 whether RHS1 op RHS2 can be inserted before or needs to be inserted
5205 after the returned stmt. */
5207 static inline gimple *
5208 find_insert_point (gimple *stmt, tree rhs1, tree rhs2, bool &insert_before)
5210 insert_before = true;
5211 if (TREE_CODE (rhs1) == SSA_NAME
5212 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs1)))
5214 stmt = SSA_NAME_DEF_STMT (rhs1);
5215 insert_before = false;
5217 if (TREE_CODE (rhs2) == SSA_NAME
5218 && reassoc_stmt_dominates_stmt_p (stmt, SSA_NAME_DEF_STMT (rhs2)))
5220 stmt = SSA_NAME_DEF_STMT (rhs2);
5221 insert_before = false;
5223 return stmt;
5226 /* If the stmt that defines operand has to be inserted, insert it
5227 before the use. */
5228 static void
5229 insert_stmt_before_use (gimple *stmt, gimple *stmt_to_insert)
5231 gcc_assert (is_gimple_assign (stmt_to_insert));
5232 tree rhs1 = gimple_assign_rhs1 (stmt_to_insert);
5233 tree rhs2 = gimple_assign_rhs2 (stmt_to_insert);
5234 bool insert_before;
5235 gimple *insert_point = find_insert_point (stmt, rhs1, rhs2, insert_before);
5236 gimple_stmt_iterator gsi = gsi_for_stmt (insert_point);
5237 gimple_set_uid (stmt_to_insert, gimple_uid (insert_point));
5239 /* If the insert point is not stmt, then insert_point would be
5240 the point where operand rhs1 or rhs2 is defined. In this case,
5241 stmt_to_insert has to be inserted afterwards. This would
5242 only happen when the stmt insertion point is flexible. */
5243 if (insert_before)
5244 gsi_insert_before (&gsi, stmt_to_insert, GSI_NEW_STMT);
5245 else
5246 insert_stmt_after (stmt_to_insert, insert_point);
5250 /* Recursively rewrite our linearized statements so that the operators
5251 match those in OPS[OPINDEX], putting the computation in rank
5252 order. Return new lhs.
5253 CHANGED is true if we shouldn't reuse the lhs SSA_NAME both in
5254 the current stmt and during recursive invocations.
5255 NEXT_CHANGED is true if we shouldn't reuse the lhs SSA_NAME in
5256 recursive invocations. */
5258 static tree
5259 rewrite_expr_tree (gimple *stmt, enum tree_code rhs_code, unsigned int opindex,
5260 const vec<operand_entry *> &ops, bool changed,
5261 bool next_changed)
5263 tree rhs1 = gimple_assign_rhs1 (stmt);
5264 tree rhs2 = gimple_assign_rhs2 (stmt);
5265 tree lhs = gimple_assign_lhs (stmt);
5266 operand_entry *oe;
5268 /* The final recursion case for this function is that you have
5269 exactly two operations left.
5270 If we had exactly one op in the entire list to start with, we
5271 would have never called this function, and the tail recursion
5272 rewrites them one at a time. */
5273 if (opindex + 2 == ops.length ())
5275 operand_entry *oe1, *oe2;
5277 oe1 = ops[opindex];
5278 oe2 = ops[opindex + 1];
5280 if (rhs1 != oe1->op || rhs2 != oe2->op)
5282 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5283 unsigned int uid = gimple_uid (stmt);
5285 if (dump_file && (dump_flags & TDF_DETAILS))
5287 fprintf (dump_file, "Transforming ");
5288 print_gimple_stmt (dump_file, stmt, 0);
5291 /* If the stmt that defines operand has to be inserted, insert it
5292 before the use. */
5293 if (oe1->stmt_to_insert)
5294 insert_stmt_before_use (stmt, oe1->stmt_to_insert);
5295 if (oe2->stmt_to_insert)
5296 insert_stmt_before_use (stmt, oe2->stmt_to_insert);
5297 /* Even when changed is false, reassociation could have e.g. removed
5298 some redundant operations, so unless we are just swapping the
5299 arguments or unless there is no change at all (then we just
5300 return lhs), force creation of a new SSA_NAME. */
5301 if (changed || ((rhs1 != oe2->op || rhs2 != oe1->op) && opindex))
5303 bool insert_before;
5304 gimple *insert_point
5305 = find_insert_point (stmt, oe1->op, oe2->op, insert_before);
5306 lhs = make_ssa_name (TREE_TYPE (lhs));
5307 stmt
5308 = gimple_build_assign (lhs, rhs_code,
5309 oe1->op, oe2->op);
5310 gimple_set_uid (stmt, uid);
5311 gimple_set_visited (stmt, true);
5312 if (insert_before)
5313 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5314 else
5315 insert_stmt_after (stmt, insert_point);
5317 else
5319 bool insert_before;
5320 gcc_checking_assert (find_insert_point (stmt, oe1->op, oe2->op,
5321 insert_before)
5322 == stmt);
5323 gimple_assign_set_rhs1 (stmt, oe1->op);
5324 gimple_assign_set_rhs2 (stmt, oe2->op);
5325 update_stmt (stmt);
5328 if (rhs1 != oe1->op && rhs1 != oe2->op)
5329 remove_visited_stmt_chain (rhs1);
5331 if (dump_file && (dump_flags & TDF_DETAILS))
5333 fprintf (dump_file, " into ");
5334 print_gimple_stmt (dump_file, stmt, 0);
5337 return lhs;
5340 /* If we hit here, we should have 3 or more ops left. */
5341 gcc_assert (opindex + 2 < ops.length ());
5343 /* Rewrite the next operator. */
5344 oe = ops[opindex];
5346 /* If the stmt that defines operand has to be inserted, insert it
5347 before the use. */
5348 if (oe->stmt_to_insert)
5349 insert_stmt_before_use (stmt, oe->stmt_to_insert);
5351 /* Recurse on the LHS of the binary operator, which is guaranteed to
5352 be the non-leaf side. */
5353 tree new_rhs1
5354 = rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), rhs_code, opindex + 1, ops,
5355 changed || oe->op != rhs2 || next_changed,
5356 false);
5358 if (oe->op != rhs2 || new_rhs1 != rhs1)
5360 if (dump_file && (dump_flags & TDF_DETAILS))
5362 fprintf (dump_file, "Transforming ");
5363 print_gimple_stmt (dump_file, stmt, 0);
5366 /* If changed is false, this is either opindex == 0
5367 or all outer rhs2's were equal to corresponding oe->op,
5368 and powi_result is NULL.
5369 That means lhs is equivalent before and after reassociation.
5370 Otherwise ensure the old lhs SSA_NAME is not reused and
5371 create a new stmt as well, so that any debug stmts will be
5372 properly adjusted. */
5373 if (changed)
5375 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5376 unsigned int uid = gimple_uid (stmt);
5377 bool insert_before;
5378 gimple *insert_point = find_insert_point (stmt, new_rhs1, oe->op,
5379 insert_before);
5381 lhs = make_ssa_name (TREE_TYPE (lhs));
5382 stmt = gimple_build_assign (lhs, rhs_code,
5383 new_rhs1, oe->op);
5384 gimple_set_uid (stmt, uid);
5385 gimple_set_visited (stmt, true);
5386 if (insert_before)
5387 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5388 else
5389 insert_stmt_after (stmt, insert_point);
5391 else
5393 bool insert_before;
5394 gcc_checking_assert (find_insert_point (stmt, new_rhs1, oe->op,
5395 insert_before)
5396 == stmt);
5397 gimple_assign_set_rhs1 (stmt, new_rhs1);
5398 gimple_assign_set_rhs2 (stmt, oe->op);
5399 update_stmt (stmt);
5402 if (dump_file && (dump_flags & TDF_DETAILS))
5404 fprintf (dump_file, " into ");
5405 print_gimple_stmt (dump_file, stmt, 0);
5408 return lhs;
5411 /* Find out how many cycles we need to compute statements chain.
5412 OPS_NUM holds number os statements in a chain. CPU_WIDTH is a
5413 maximum number of independent statements we may execute per cycle. */
5415 static int
5416 get_required_cycles (int ops_num, int cpu_width)
5418 int res;
5419 int elog;
5420 unsigned int rest;
5422 /* While we have more than 2 * cpu_width operands
5423 we may reduce number of operands by cpu_width
5424 per cycle. */
5425 res = ops_num / (2 * cpu_width);
5427 /* Remained operands count may be reduced twice per cycle
5428 until we have only one operand. */
5429 rest = (unsigned)(ops_num - res * cpu_width);
5430 elog = exact_log2 (rest);
5431 if (elog >= 0)
5432 res += elog;
5433 else
5434 res += floor_log2 (rest) + 1;
5436 return res;
5439 /* Given that the target fully pipelines FMA instructions, return the latency
5440 of MULT_EXPRs that can't be hidden by the FMAs. WIDTH is the number of
5441 pipes. */
5443 static inline int
5444 get_mult_latency_consider_fma (int ops_num, int mult_num, int width)
5446 gcc_checking_assert (mult_num && mult_num <= ops_num);
5448 /* For each partition, if mult_num == ops_num, there's latency(MULT)*2.
5449 e.g:
5451 A * B + C * D
5453 _1 = A * B;
5454 _2 = .FMA (C, D, _1);
5456 Otherwise there's latency(MULT)*1 in the first FMA. */
5457 return CEIL (ops_num, width) == CEIL (mult_num, width) ? 2 : 1;
5460 /* Returns an optimal number of registers to use for computation of
5461 given statements.
5463 LHS is the result ssa name of OPS. MULT_NUM is number of sub-expressions
5464 that are MULT_EXPRs, when OPS are PLUS_EXPRs or MINUS_EXPRs. */
5466 static int
5467 get_reassociation_width (vec<operand_entry *> *ops, int mult_num, tree lhs,
5468 enum tree_code opc, machine_mode mode)
5470 int param_width = param_tree_reassoc_width;
5471 int width;
5472 int width_min;
5473 int cycles_best;
5474 int ops_num = ops->length ();
5476 if (param_width > 0)
5477 width = param_width;
5478 else
5479 width = targetm.sched.reassociation_width (opc, mode);
5481 if (width == 1)
5482 return width;
5484 /* Get the minimal time required for sequence computation. */
5485 cycles_best = get_required_cycles (ops_num, width);
5487 /* Check if we may use less width and still compute sequence for
5488 the same time. It will allow us to reduce registers usage.
5489 get_required_cycles is monotonically increasing with lower width
5490 so we can perform a binary search for the minimal width that still
5491 results in the optimal cycle count. */
5492 width_min = 1;
5494 /* If the target fully pipelines FMA instruction, the multiply part can start
5495 already if its operands are ready. Assuming symmetric pipes are used for
5496 FMUL/FADD/FMA, then for a sequence of FMA like:
5498 _8 = .FMA (_2, _3, _1);
5499 _9 = .FMA (_5, _4, _8);
5500 _10 = .FMA (_7, _6, _9);
5502 , if width=1, the latency is latency(MULT) + latency(ADD)*3.
5503 While with width=2:
5505 _8 = _4 * _5;
5506 _9 = .FMA (_2, _3, _1);
5507 _10 = .FMA (_6, _7, _8);
5508 _11 = _9 + _10;
5510 , it is latency(MULT)*2 + latency(ADD)*2. Assuming latency(MULT) >=
5511 latency(ADD), the first variant is preferred.
5513 Find out if we can get a smaller width considering FMA.
5514 Assume FMUL and FMA use the same units that can also do FADD.
5515 For other scenarios, such as when FMUL and FADD are using separated units,
5516 the following code may not apply. */
5518 int width_mult = targetm.sched.reassociation_width (MULT_EXPR, mode);
5519 if (width > 1 && mult_num && param_fully_pipelined_fma
5520 && width_mult <= width)
5522 /* Latency of MULT_EXPRs. */
5523 int lat_mul
5524 = get_mult_latency_consider_fma (ops_num, mult_num, width_mult);
5526 /* Quick search might not apply. So start from 1. */
5527 for (int i = 1; i < width_mult; i++)
5529 int lat_mul_new
5530 = get_mult_latency_consider_fma (ops_num, mult_num, i);
5531 int lat_add_new = get_required_cycles (ops_num, i);
5533 /* Assume latency(MULT) >= latency(ADD). */
5534 if (lat_mul - lat_mul_new >= lat_add_new - cycles_best)
5536 width = i;
5537 break;
5541 else
5543 while (width > width_min)
5545 int width_mid = (width + width_min) / 2;
5547 if (get_required_cycles (ops_num, width_mid) == cycles_best)
5548 width = width_mid;
5549 else if (width_min < width_mid)
5550 width_min = width_mid;
5551 else
5552 break;
5556 /* If there's loop dependent FMA result, return width=2 to avoid it. This is
5557 better than skipping these FMA candidates in widening_mul. */
5558 if (width == 1
5559 && maybe_le (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (lhs))),
5560 param_avoid_fma_max_bits))
5562 /* Look for cross backedge dependency:
5563 1. LHS is a phi argument in the same basic block it is defined.
5564 2. And the result of the phi node is used in OPS. */
5565 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (lhs));
5567 use_operand_p use_p;
5568 imm_use_iterator iter;
5569 FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
5570 if (gphi *phi = dyn_cast<gphi *> (USE_STMT (use_p)))
5572 if (gimple_phi_arg_edge (phi, phi_arg_index_from_use (use_p))->src
5573 != bb)
5574 continue;
5575 tree phi_result = gimple_phi_result (phi);
5576 operand_entry *oe;
5577 unsigned int j;
5578 FOR_EACH_VEC_ELT (*ops, j, oe)
5580 if (TREE_CODE (oe->op) != SSA_NAME)
5581 continue;
5583 /* Result of phi is operand of PLUS_EXPR. */
5584 if (oe->op == phi_result)
5585 return 2;
5587 /* Check is result of phi is operand of MULT_EXPR. */
5588 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
5589 if (is_gimple_assign (def_stmt)
5590 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR)
5592 tree rhs = gimple_assign_rhs1 (def_stmt);
5593 if (TREE_CODE (rhs) == SSA_NAME)
5595 if (rhs == phi_result)
5596 return 2;
5597 def_stmt = SSA_NAME_DEF_STMT (rhs);
5600 if (is_gimple_assign (def_stmt)
5601 && gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
5603 if (gimple_assign_rhs1 (def_stmt) == phi_result
5604 || gimple_assign_rhs2 (def_stmt) == phi_result)
5605 return 2;
5611 return width;
5614 #define SPECIAL_BIASED_END_STMT 0 /* It is the end stmt of all ops. */
5615 #define BIASED_END_STMT 1 /* It is the end stmt of normal or biased ops. */
5616 #define NORMAL_END_STMT 2 /* It is the end stmt of normal ops. */
5618 /* Rewrite statements with dependency chain with regard the chance to generate
5619 FMA.
5620 For the chain with FMA: Try to keep fma opportunity as much as possible.
5621 For the chain without FMA: Putting the computation in rank order and trying
5622 to allow operations to be executed in parallel.
5623 E.g.
5624 e + f + a * b + c * d;
5626 ssa1 = e + a * b;
5627 ssa2 = f + c * d;
5628 ssa3 = ssa1 + ssa2;
5630 This reassociation approach preserves the chance of fma generation as much
5631 as possible.
5633 Another thing is to avoid adding loop-carried ops to long chains, otherwise
5634 the whole chain will have dependencies across the loop iteration. Just keep
5635 loop-carried ops in a separate chain.
5636 E.g.
5637 x_1 = phi (x_0, x_2)
5638 y_1 = phi (y_0, y_2)
5640 a + b + c + d + e + x1 + y1
5642 SSA1 = a + b;
5643 SSA2 = c + d;
5644 SSA3 = SSA1 + e;
5645 SSA4 = SSA3 + SSA2;
5646 SSA5 = x1 + y1;
5647 SSA6 = SSA4 + SSA5;
5649 static void
5650 rewrite_expr_tree_parallel (gassign *stmt, int width, bool has_fma,
5651 const vec<operand_entry *> &ops)
5653 enum tree_code opcode = gimple_assign_rhs_code (stmt);
5654 int op_num = ops.length ();
5655 int op_normal_num = op_num;
5656 gcc_assert (op_num > 0);
5657 int stmt_num = op_num - 1;
5658 gimple **stmts = XALLOCAVEC (gimple *, stmt_num);
5659 int i = 0, j = 0;
5660 tree tmp_op[2], op1;
5661 operand_entry *oe;
5662 gimple *stmt1 = NULL;
5663 tree last_rhs1 = gimple_assign_rhs1 (stmt);
5664 int last_rhs1_stmt_index = 0, last_rhs2_stmt_index = 0;
5665 int width_active = 0, width_count = 0;
5666 bool has_biased = false, ops_changed = false;
5667 auto_vec<operand_entry *> ops_normal;
5668 auto_vec<operand_entry *> ops_biased;
5669 vec<operand_entry *> *ops1;
5671 /* We start expression rewriting from the top statements.
5672 So, in this loop we create a full list of statements
5673 we will work with. */
5674 stmts[stmt_num - 1] = stmt;
5675 for (i = stmt_num - 2; i >= 0; i--)
5676 stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1]));
5678 /* Avoid adding loop-carried ops to long chains, first filter out the
5679 loop-carried. But we need to make sure that the length of the remainder
5680 is not less than 4, which is the smallest ops length we can break the
5681 dependency. */
5682 FOR_EACH_VEC_ELT (ops, i, oe)
5684 if (TREE_CODE (oe->op) == SSA_NAME
5685 && bitmap_bit_p (biased_names, SSA_NAME_VERSION (oe->op))
5686 && op_normal_num > 4)
5688 ops_biased.safe_push (oe);
5689 has_biased = true;
5690 op_normal_num --;
5692 else
5693 ops_normal.safe_push (oe);
5696 /* Width should not be larger than ops length / 2, since we can not create
5697 more parallel dependency chains that exceeds such value. */
5698 int width_normal = op_normal_num / 2;
5699 int width_biased = (op_num - op_normal_num) / 2;
5700 width_normal = width <= width_normal ? width : width_normal;
5701 width_biased = width <= width_biased ? width : width_biased;
5703 ops1 = &ops_normal;
5704 width_count = width_active = width_normal;
5706 /* Build parallel dependency chain according to width. */
5707 for (i = 0; i < stmt_num; i++)
5709 if (dump_file && (dump_flags & TDF_DETAILS))
5711 fprintf (dump_file, "Transforming ");
5712 print_gimple_stmt (dump_file, stmts[i], 0);
5715 /* When the work of normal ops is over, but the loop is not over,
5716 continue to do biased ops. */
5717 if (width_count == 0 && ops1 == &ops_normal)
5719 ops1 = &ops_biased;
5720 width_count = width_active = width_biased;
5721 ops_changed = true;
5724 /* Swap the operands if no FMA in the chain. */
5725 if (ops1->length () > 2 && !has_fma)
5726 swap_ops_for_binary_stmt (*ops1, ops1->length () - 3);
5728 if (i < width_active
5729 || (ops_changed && i <= (last_rhs1_stmt_index + width_active)))
5731 for (j = 0; j < 2; j++)
5733 oe = ops1->pop ();
5734 tmp_op[j] = oe->op;
5735 /* If the stmt that defines operand has to be inserted, insert it
5736 before the use. */
5737 stmt1 = oe->stmt_to_insert;
5738 if (stmt1)
5739 insert_stmt_before_use (stmts[i], stmt1);
5740 stmt1 = NULL;
5742 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5743 tmp_op[1],
5744 tmp_op[0],
5745 opcode);
5746 gimple_set_visited (stmts[i], true);
5749 else
5751 /* We keep original statement only for the last one. All others are
5752 recreated. */
5753 if (!ops1->length ())
5755 /* For biased length equal to 2. */
5756 if (width_count == BIASED_END_STMT && !last_rhs2_stmt_index)
5757 last_rhs2_stmt_index = i - 1;
5759 /* When width_count == 2 and there is no biased, just finish. */
5760 if (width_count == NORMAL_END_STMT && !has_biased)
5762 last_rhs1_stmt_index = i - 1;
5763 last_rhs2_stmt_index = i - 2;
5765 if (last_rhs1_stmt_index && (last_rhs2_stmt_index || !has_biased))
5767 /* We keep original statement only for the last one. All
5768 others are recreated. */
5769 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5770 (stmts[last_rhs1_stmt_index]));
5771 gimple_assign_set_rhs2 (stmts[i], gimple_assign_lhs
5772 (stmts[last_rhs2_stmt_index]));
5773 update_stmt (stmts[i]);
5775 else
5777 stmts[i] =
5778 build_and_add_sum (TREE_TYPE (last_rhs1),
5779 gimple_assign_lhs (stmts[i-width_count]),
5780 gimple_assign_lhs
5781 (stmts[i-width_count+1]),
5782 opcode);
5783 gimple_set_visited (stmts[i], true);
5784 width_count--;
5786 /* It is the end of normal or biased ops.
5787 last_rhs1_stmt_index used to record the last stmt index
5788 for normal ops. last_rhs2_stmt_index used to record the
5789 last stmt index for biased ops. */
5790 if (width_count == BIASED_END_STMT)
5792 gcc_assert (has_biased);
5793 if (ops_biased.length ())
5794 last_rhs1_stmt_index = i;
5795 else
5796 last_rhs2_stmt_index = i;
5797 width_count--;
5801 else
5803 /* Attach the rest ops to the parallel dependency chain. */
5804 oe = ops1->pop ();
5805 op1 = oe->op;
5806 stmt1 = oe->stmt_to_insert;
5807 if (stmt1)
5808 insert_stmt_before_use (stmts[i], stmt1);
5809 stmt1 = NULL;
5811 /* For only one biased ops. */
5812 if (width_count == SPECIAL_BIASED_END_STMT)
5814 /* We keep original statement only for the last one. All
5815 others are recreated. */
5816 gcc_assert (has_biased);
5817 gimple_assign_set_rhs1 (stmts[i], gimple_assign_lhs
5818 (stmts[last_rhs1_stmt_index]));
5819 gimple_assign_set_rhs2 (stmts[i], op1);
5820 update_stmt (stmts[i]);
5822 else
5824 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1),
5825 gimple_assign_lhs
5826 (stmts[i-width_active]),
5827 op1,
5828 opcode);
5829 gimple_set_visited (stmts[i], true);
5834 if (dump_file && (dump_flags & TDF_DETAILS))
5836 fprintf (dump_file, " into ");
5837 print_gimple_stmt (dump_file, stmts[i], 0);
5841 remove_visited_stmt_chain (last_rhs1);
5844 /* Transform STMT, which is really (A +B) + (C + D) into the left
5845 linear form, ((A+B)+C)+D.
5846 Recurse on D if necessary. */
5848 static void
5849 linearize_expr (gimple *stmt)
5851 gimple_stmt_iterator gsi;
5852 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
5853 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5854 gimple *oldbinrhs = binrhs;
5855 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5856 gimple *newbinrhs = NULL;
5857 class loop *loop = loop_containing_stmt (stmt);
5858 tree lhs = gimple_assign_lhs (stmt);
5860 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
5861 && is_reassociable_op (binrhs, rhscode, loop));
5863 gsi = gsi_for_stmt (stmt);
5865 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
5866 binrhs = gimple_build_assign (make_ssa_name (TREE_TYPE (lhs)),
5867 gimple_assign_rhs_code (binrhs),
5868 gimple_assign_lhs (binlhs),
5869 gimple_assign_rhs2 (binrhs));
5870 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
5871 gsi_insert_before (&gsi, binrhs, GSI_SAME_STMT);
5872 gimple_set_uid (binrhs, gimple_uid (stmt));
5874 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
5875 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
5877 if (dump_file && (dump_flags & TDF_DETAILS))
5879 fprintf (dump_file, "Linearized: ");
5880 print_gimple_stmt (dump_file, stmt, 0);
5883 reassociate_stats.linearized++;
5884 update_stmt (stmt);
5886 gsi = gsi_for_stmt (oldbinrhs);
5887 reassoc_remove_stmt (&gsi);
5888 release_defs (oldbinrhs);
5890 gimple_set_visited (stmt, true);
5891 gimple_set_visited (binlhs, true);
5892 gimple_set_visited (binrhs, true);
5894 /* Tail recurse on the new rhs if it still needs reassociation. */
5895 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
5896 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
5897 want to change the algorithm while converting to tuples. */
5898 linearize_expr (stmt);
5901 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
5902 it. Otherwise, return NULL. */
5904 static gimple *
5905 get_single_immediate_use (tree lhs)
5907 use_operand_p immuse;
5908 gimple *immusestmt;
5910 if (TREE_CODE (lhs) == SSA_NAME
5911 && single_imm_use (lhs, &immuse, &immusestmt)
5912 && is_gimple_assign (immusestmt))
5913 return immusestmt;
5915 return NULL;
5918 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
5919 representing the negated value. Insertions of any necessary
5920 instructions go before GSI.
5921 This function is recursive in that, if you hand it "a_5" as the
5922 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
5923 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
5925 static tree
5926 negate_value (tree tonegate, gimple_stmt_iterator *gsip)
5928 gimple *negatedefstmt = NULL;
5929 tree resultofnegate;
5930 gimple_stmt_iterator gsi;
5931 unsigned int uid;
5933 /* If we are trying to negate a name, defined by an add, negate the
5934 add operands instead. */
5935 if (TREE_CODE (tonegate) == SSA_NAME)
5936 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
5937 if (TREE_CODE (tonegate) == SSA_NAME
5938 && is_gimple_assign (negatedefstmt)
5939 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
5940 && has_single_use (gimple_assign_lhs (negatedefstmt))
5941 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
5943 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
5944 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
5945 tree lhs = gimple_assign_lhs (negatedefstmt);
5946 gimple *g;
5948 gsi = gsi_for_stmt (negatedefstmt);
5949 rhs1 = negate_value (rhs1, &gsi);
5951 gsi = gsi_for_stmt (negatedefstmt);
5952 rhs2 = negate_value (rhs2, &gsi);
5954 gsi = gsi_for_stmt (negatedefstmt);
5955 lhs = make_ssa_name (TREE_TYPE (lhs));
5956 gimple_set_visited (negatedefstmt, true);
5957 g = gimple_build_assign (lhs, PLUS_EXPR, rhs1, rhs2);
5958 gimple_set_uid (g, gimple_uid (negatedefstmt));
5959 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5960 return lhs;
5963 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
5964 resultofnegate = force_gimple_operand_gsi (gsip, tonegate, true,
5965 NULL_TREE, true, GSI_SAME_STMT);
5966 gsi = *gsip;
5967 uid = gimple_uid (gsi_stmt (gsi));
5968 for (gsi_prev (&gsi); !gsi_end_p (gsi); gsi_prev (&gsi))
5970 gimple *stmt = gsi_stmt (gsi);
5971 if (gimple_uid (stmt) != 0)
5972 break;
5973 gimple_set_uid (stmt, uid);
5975 return resultofnegate;
5978 /* Return true if we should break up the subtract in STMT into an add
5979 with negate. This is true when we the subtract operands are really
5980 adds, or the subtract itself is used in an add expression. In
5981 either case, breaking up the subtract into an add with negate
5982 exposes the adds to reassociation. */
5984 static bool
5985 should_break_up_subtract (gimple *stmt)
5987 tree lhs = gimple_assign_lhs (stmt);
5988 tree binlhs = gimple_assign_rhs1 (stmt);
5989 tree binrhs = gimple_assign_rhs2 (stmt);
5990 gimple *immusestmt;
5991 class loop *loop = loop_containing_stmt (stmt);
5993 if (TREE_CODE (binlhs) == SSA_NAME
5994 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
5995 return true;
5997 if (TREE_CODE (binrhs) == SSA_NAME
5998 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
5999 return true;
6001 if (TREE_CODE (lhs) == SSA_NAME
6002 && (immusestmt = get_single_immediate_use (lhs))
6003 && is_gimple_assign (immusestmt)
6004 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
6005 || (gimple_assign_rhs_code (immusestmt) == MINUS_EXPR
6006 && gimple_assign_rhs1 (immusestmt) == lhs)
6007 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
6008 return true;
6009 return false;
6012 /* Transform STMT from A - B into A + -B. */
6014 static void
6015 break_up_subtract (gimple *stmt, gimple_stmt_iterator *gsip)
6017 tree rhs1 = gimple_assign_rhs1 (stmt);
6018 tree rhs2 = gimple_assign_rhs2 (stmt);
6020 if (dump_file && (dump_flags & TDF_DETAILS))
6022 fprintf (dump_file, "Breaking up subtract ");
6023 print_gimple_stmt (dump_file, stmt, 0);
6026 rhs2 = negate_value (rhs2, gsip);
6027 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
6028 update_stmt (stmt);
6031 /* Determine whether STMT is a builtin call that raises an SSA name
6032 to an integer power and has only one use. If so, and this is early
6033 reassociation and unsafe math optimizations are permitted, place
6034 the SSA name in *BASE and the exponent in *EXPONENT, and return TRUE.
6035 If any of these conditions does not hold, return FALSE. */
6037 static bool
6038 acceptable_pow_call (gcall *stmt, tree *base, HOST_WIDE_INT *exponent)
6040 tree arg1;
6041 REAL_VALUE_TYPE c, cint;
6043 switch (gimple_call_combined_fn (stmt))
6045 CASE_CFN_POW:
6046 if (flag_errno_math)
6047 return false;
6049 *base = gimple_call_arg (stmt, 0);
6050 arg1 = gimple_call_arg (stmt, 1);
6052 if (TREE_CODE (arg1) != REAL_CST)
6053 return false;
6055 c = TREE_REAL_CST (arg1);
6057 if (REAL_EXP (&c) > HOST_BITS_PER_WIDE_INT)
6058 return false;
6060 *exponent = real_to_integer (&c);
6061 real_from_integer (&cint, VOIDmode, *exponent, SIGNED);
6062 if (!real_identical (&c, &cint))
6063 return false;
6065 break;
6067 CASE_CFN_POWI:
6068 *base = gimple_call_arg (stmt, 0);
6069 arg1 = gimple_call_arg (stmt, 1);
6071 if (!tree_fits_shwi_p (arg1))
6072 return false;
6074 *exponent = tree_to_shwi (arg1);
6075 break;
6077 default:
6078 return false;
6081 /* Expanding negative exponents is generally unproductive, so we don't
6082 complicate matters with those. Exponents of zero and one should
6083 have been handled by expression folding. */
6084 if (*exponent < 2 || TREE_CODE (*base) != SSA_NAME)
6085 return false;
6087 return true;
6090 /* Try to derive and add operand entry for OP to *OPS. Return false if
6091 unsuccessful. */
6093 static bool
6094 try_special_add_to_ops (vec<operand_entry *> *ops,
6095 enum tree_code code,
6096 tree op, gimple* def_stmt)
6098 tree base = NULL_TREE;
6099 HOST_WIDE_INT exponent = 0;
6101 if (TREE_CODE (op) != SSA_NAME
6102 || ! has_single_use (op))
6103 return false;
6105 if (code == MULT_EXPR
6106 && reassoc_insert_powi_p
6107 && flag_unsafe_math_optimizations
6108 && is_gimple_call (def_stmt)
6109 && acceptable_pow_call (as_a <gcall *> (def_stmt), &base, &exponent))
6111 add_repeat_to_ops_vec (ops, base, exponent);
6112 gimple_set_visited (def_stmt, true);
6113 return true;
6115 else if (code == MULT_EXPR
6116 && is_gimple_assign (def_stmt)
6117 && gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6118 && !HONOR_SNANS (TREE_TYPE (op))
6119 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (op))
6120 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (op)))
6121 && (!FLOAT_TYPE_P (TREE_TYPE (op))
6122 || !DECIMAL_FLOAT_MODE_P (element_mode (op))))
6124 tree rhs1 = gimple_assign_rhs1 (def_stmt);
6125 tree cst = build_minus_one_cst (TREE_TYPE (op));
6126 add_to_ops_vec (ops, rhs1);
6127 add_to_ops_vec (ops, cst);
6128 gimple_set_visited (def_stmt, true);
6129 return true;
6132 return false;
6135 /* Recursively linearize a binary expression that is the RHS of STMT.
6136 Place the operands of the expression tree in the vector named OPS. */
6138 static void
6139 linearize_expr_tree (vec<operand_entry *> *ops, gimple *stmt,
6140 bool is_associative, bool set_visited)
6142 tree binlhs = gimple_assign_rhs1 (stmt);
6143 tree binrhs = gimple_assign_rhs2 (stmt);
6144 gimple *binlhsdef = NULL, *binrhsdef = NULL;
6145 bool binlhsisreassoc = false;
6146 bool binrhsisreassoc = false;
6147 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
6148 class loop *loop = loop_containing_stmt (stmt);
6150 if (set_visited)
6151 gimple_set_visited (stmt, true);
6153 if (TREE_CODE (binlhs) == SSA_NAME)
6155 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
6156 binlhsisreassoc = (is_reassociable_op (binlhsdef, rhscode, loop)
6157 && !stmt_could_throw_p (cfun, binlhsdef));
6160 if (TREE_CODE (binrhs) == SSA_NAME)
6162 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
6163 binrhsisreassoc = (is_reassociable_op (binrhsdef, rhscode, loop)
6164 && !stmt_could_throw_p (cfun, binrhsdef));
6167 /* If the LHS is not reassociable, but the RHS is, we need to swap
6168 them. If neither is reassociable, there is nothing we can do, so
6169 just put them in the ops vector. If the LHS is reassociable,
6170 linearize it. If both are reassociable, then linearize the RHS
6171 and the LHS. */
6173 if (!binlhsisreassoc)
6175 /* If this is not a associative operation like division, give up. */
6176 if (!is_associative)
6178 add_to_ops_vec (ops, binrhs);
6179 return;
6182 if (!binrhsisreassoc)
6184 bool swap = false;
6185 if (try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6186 /* If we add ops for the rhs we expect to be able to recurse
6187 to it via the lhs during expression rewrite so swap
6188 operands. */
6189 swap = true;
6190 else
6191 add_to_ops_vec (ops, binrhs);
6193 if (!try_special_add_to_ops (ops, rhscode, binlhs, binlhsdef))
6194 add_to_ops_vec (ops, binlhs);
6196 if (!swap)
6197 return;
6200 if (dump_file && (dump_flags & TDF_DETAILS))
6202 fprintf (dump_file, "swapping operands of ");
6203 print_gimple_stmt (dump_file, stmt, 0);
6206 swap_ssa_operands (stmt,
6207 gimple_assign_rhs1_ptr (stmt),
6208 gimple_assign_rhs2_ptr (stmt));
6209 update_stmt (stmt);
6211 if (dump_file && (dump_flags & TDF_DETAILS))
6213 fprintf (dump_file, " is now ");
6214 print_gimple_stmt (dump_file, stmt, 0);
6216 if (!binrhsisreassoc)
6217 return;
6219 /* We want to make it so the lhs is always the reassociative op,
6220 so swap. */
6221 std::swap (binlhs, binrhs);
6223 else if (binrhsisreassoc)
6225 linearize_expr (stmt);
6226 binlhs = gimple_assign_rhs1 (stmt);
6227 binrhs = gimple_assign_rhs2 (stmt);
6230 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
6231 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
6232 rhscode, loop));
6233 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
6234 is_associative, set_visited);
6236 if (!try_special_add_to_ops (ops, rhscode, binrhs, binrhsdef))
6237 add_to_ops_vec (ops, binrhs);
6240 /* Repropagate the negates back into subtracts, since no other pass
6241 currently does it. */
6243 static void
6244 repropagate_negates (void)
6246 unsigned int i = 0;
6247 tree negate;
6249 FOR_EACH_VEC_ELT (plus_negates, i, negate)
6251 gimple *user = get_single_immediate_use (negate);
6252 if (!user || !is_gimple_assign (user))
6253 continue;
6255 tree negateop = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (negate));
6256 if (TREE_CODE (negateop) == SSA_NAME
6257 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (negateop))
6258 continue;
6260 /* The negate operand can be either operand of a PLUS_EXPR
6261 (it can be the LHS if the RHS is a constant for example).
6263 Force the negate operand to the RHS of the PLUS_EXPR, then
6264 transform the PLUS_EXPR into a MINUS_EXPR. */
6265 if (gimple_assign_rhs_code (user) == PLUS_EXPR)
6267 /* If the negated operand appears on the LHS of the
6268 PLUS_EXPR, exchange the operands of the PLUS_EXPR
6269 to force the negated operand to the RHS of the PLUS_EXPR. */
6270 if (gimple_assign_rhs1 (user) == negate)
6272 swap_ssa_operands (user,
6273 gimple_assign_rhs1_ptr (user),
6274 gimple_assign_rhs2_ptr (user));
6277 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
6278 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
6279 if (gimple_assign_rhs2 (user) == negate)
6281 tree rhs1 = gimple_assign_rhs1 (user);
6282 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6283 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1,
6284 negateop);
6285 update_stmt (user);
6288 else if (gimple_assign_rhs_code (user) == MINUS_EXPR)
6290 if (gimple_assign_rhs1 (user) == negate)
6292 /* We have
6293 x = -negateop
6294 y = x - b
6295 which we transform into
6296 x = negateop + b
6297 y = -x .
6298 This pushes down the negate which we possibly can merge
6299 into some other operation, hence insert it into the
6300 plus_negates vector. */
6301 gimple *feed = SSA_NAME_DEF_STMT (negate);
6302 tree b = gimple_assign_rhs2 (user);
6303 gimple_stmt_iterator gsi = gsi_for_stmt (feed);
6304 gimple_stmt_iterator gsi2 = gsi_for_stmt (user);
6305 tree x = make_ssa_name (TREE_TYPE (gimple_assign_lhs (feed)));
6306 gimple *g = gimple_build_assign (x, PLUS_EXPR, negateop, b);
6307 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
6308 gimple_assign_set_rhs_with_ops (&gsi2, NEGATE_EXPR, x);
6309 user = gsi_stmt (gsi2);
6310 update_stmt (user);
6311 reassoc_remove_stmt (&gsi);
6312 release_defs (feed);
6313 plus_negates.safe_push (gimple_assign_lhs (user));
6315 else
6317 /* Transform "x = -negateop; y = b - x" into "y = b + negateop",
6318 getting rid of one operation. */
6319 tree rhs1 = gimple_assign_rhs1 (user);
6320 gimple_stmt_iterator gsi = gsi_for_stmt (user);
6321 gimple_assign_set_rhs_with_ops (&gsi, PLUS_EXPR, rhs1, negateop);
6322 update_stmt (gsi_stmt (gsi));
6328 /* Break up subtract operations in block BB.
6330 We do this top down because we don't know whether the subtract is
6331 part of a possible chain of reassociation except at the top.
6333 IE given
6334 d = f + g
6335 c = a + e
6336 b = c - d
6337 q = b - r
6338 k = t - q
6340 we want to break up k = t - q, but we won't until we've transformed q
6341 = b - r, which won't be broken up until we transform b = c - d.
6343 En passant, clear the GIMPLE visited flag on every statement
6344 and set UIDs within each basic block. */
6346 static void
6347 break_up_subtract_bb (basic_block bb)
6349 gimple_stmt_iterator gsi;
6350 unsigned int uid = 1;
6352 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
6354 gimple *stmt = gsi_stmt (gsi);
6355 gimple_set_visited (stmt, false);
6356 gimple_set_uid (stmt, uid++);
6358 if (!is_gimple_assign (stmt)
6359 || !can_reassociate_type_p (TREE_TYPE (gimple_assign_lhs (stmt)))
6360 || !can_reassociate_op_p (gimple_assign_lhs (stmt)))
6361 continue;
6363 /* Look for simple gimple subtract operations. */
6364 if (gimple_assign_rhs_code (stmt) == MINUS_EXPR)
6366 if (!can_reassociate_op_p (gimple_assign_rhs1 (stmt))
6367 || !can_reassociate_op_p (gimple_assign_rhs2 (stmt)))
6368 continue;
6370 /* Check for a subtract used only in an addition. If this
6371 is the case, transform it into add of a negate for better
6372 reassociation. IE transform C = A-B into C = A + -B if C
6373 is only used in an addition. */
6374 if (should_break_up_subtract (stmt))
6375 break_up_subtract (stmt, &gsi);
6377 else if (gimple_assign_rhs_code (stmt) == NEGATE_EXPR
6378 && can_reassociate_op_p (gimple_assign_rhs1 (stmt)))
6379 plus_negates.safe_push (gimple_assign_lhs (stmt));
6383 /* Used for repeated factor analysis. */
6384 struct repeat_factor
6386 /* An SSA name that occurs in a multiply chain. */
6387 tree factor;
6389 /* Cached rank of the factor. */
6390 unsigned rank;
6392 /* Number of occurrences of the factor in the chain. */
6393 HOST_WIDE_INT count;
6395 /* An SSA name representing the product of this factor and
6396 all factors appearing later in the repeated factor vector. */
6397 tree repr;
6401 static vec<repeat_factor> repeat_factor_vec;
6403 /* Used for sorting the repeat factor vector. Sort primarily by
6404 ascending occurrence count, secondarily by descending rank. */
6406 static int
6407 compare_repeat_factors (const void *x1, const void *x2)
6409 const repeat_factor *rf1 = (const repeat_factor *) x1;
6410 const repeat_factor *rf2 = (const repeat_factor *) x2;
6412 if (rf1->count < rf2->count)
6413 return -1;
6414 else if (rf1->count > rf2->count)
6415 return 1;
6417 if (rf1->rank < rf2->rank)
6418 return 1;
6419 else if (rf1->rank > rf2->rank)
6420 return -1;
6422 return 0;
6425 /* Look for repeated operands in OPS in the multiply tree rooted at
6426 STMT. Replace them with an optimal sequence of multiplies and powi
6427 builtin calls, and remove the used operands from OPS. Return an
6428 SSA name representing the value of the replacement sequence. */
6430 static tree
6431 attempt_builtin_powi (gimple *stmt, vec<operand_entry *> *ops)
6433 unsigned i, j, vec_len;
6434 int ii;
6435 operand_entry *oe;
6436 repeat_factor *rf1, *rf2;
6437 repeat_factor rfnew;
6438 tree result = NULL_TREE;
6439 tree target_ssa, iter_result;
6440 tree type = TREE_TYPE (gimple_get_lhs (stmt));
6441 tree powi_fndecl = mathfn_built_in (type, BUILT_IN_POWI);
6442 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
6443 gimple *mul_stmt, *pow_stmt;
6445 /* Nothing to do if BUILT_IN_POWI doesn't exist for this type and
6446 target, unless type is integral. */
6447 if (!powi_fndecl && !INTEGRAL_TYPE_P (type))
6448 return NULL_TREE;
6450 /* Allocate the repeated factor vector. */
6451 repeat_factor_vec.create (10);
6453 /* Scan the OPS vector for all SSA names in the product and build
6454 up a vector of occurrence counts for each factor. */
6455 FOR_EACH_VEC_ELT (*ops, i, oe)
6457 if (TREE_CODE (oe->op) == SSA_NAME)
6459 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6461 if (rf1->factor == oe->op)
6463 rf1->count += oe->count;
6464 break;
6468 if (j >= repeat_factor_vec.length ())
6470 rfnew.factor = oe->op;
6471 rfnew.rank = oe->rank;
6472 rfnew.count = oe->count;
6473 rfnew.repr = NULL_TREE;
6474 repeat_factor_vec.safe_push (rfnew);
6479 /* Sort the repeated factor vector by (a) increasing occurrence count,
6480 and (b) decreasing rank. */
6481 repeat_factor_vec.qsort (compare_repeat_factors);
6483 /* It is generally best to combine as many base factors as possible
6484 into a product before applying __builtin_powi to the result.
6485 However, the sort order chosen for the repeated factor vector
6486 allows us to cache partial results for the product of the base
6487 factors for subsequent use. When we already have a cached partial
6488 result from a previous iteration, it is best to make use of it
6489 before looking for another __builtin_pow opportunity.
6491 As an example, consider x * x * y * y * y * z * z * z * z.
6492 We want to first compose the product x * y * z, raise it to the
6493 second power, then multiply this by y * z, and finally multiply
6494 by z. This can be done in 5 multiplies provided we cache y * z
6495 for use in both expressions:
6497 t1 = y * z
6498 t2 = t1 * x
6499 t3 = t2 * t2
6500 t4 = t1 * t3
6501 result = t4 * z
6503 If we instead ignored the cached y * z and first multiplied by
6504 the __builtin_pow opportunity z * z, we would get the inferior:
6506 t1 = y * z
6507 t2 = t1 * x
6508 t3 = t2 * t2
6509 t4 = z * z
6510 t5 = t3 * t4
6511 result = t5 * y */
6513 vec_len = repeat_factor_vec.length ();
6515 /* Repeatedly look for opportunities to create a builtin_powi call. */
6516 while (true)
6518 HOST_WIDE_INT power;
6520 /* First look for the largest cached product of factors from
6521 preceding iterations. If found, create a builtin_powi for
6522 it if the minimum occurrence count for its factors is at
6523 least 2, or just use this cached product as our next
6524 multiplicand if the minimum occurrence count is 1. */
6525 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6527 if (rf1->repr && rf1->count > 0)
6528 break;
6531 if (j < vec_len)
6533 power = rf1->count;
6535 if (power == 1)
6537 iter_result = rf1->repr;
6539 if (dump_file && (dump_flags & TDF_DETAILS))
6541 unsigned elt;
6542 repeat_factor *rf;
6543 fputs ("Multiplying by cached product ", dump_file);
6544 for (elt = j; elt < vec_len; elt++)
6546 rf = &repeat_factor_vec[elt];
6547 print_generic_expr (dump_file, rf->factor);
6548 if (elt < vec_len - 1)
6549 fputs (" * ", dump_file);
6551 fputs ("\n", dump_file);
6554 else
6556 if (INTEGRAL_TYPE_P (type))
6558 gcc_assert (power > 1);
6559 gimple_stmt_iterator gsip = gsi;
6560 gsi_prev (&gsip);
6561 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6562 rf1->repr, power);
6563 gimple_stmt_iterator gsic = gsi;
6564 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6566 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6567 gimple_set_visited (gsi_stmt (gsic), true);
6568 gsi_prev (&gsic);
6571 else
6573 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6574 pow_stmt
6575 = gimple_build_call (powi_fndecl, 2, rf1->repr,
6576 build_int_cst (integer_type_node,
6577 power));
6578 gimple_call_set_lhs (pow_stmt, iter_result);
6579 gimple_set_location (pow_stmt, gimple_location (stmt));
6580 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6581 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6584 if (dump_file && (dump_flags & TDF_DETAILS))
6586 unsigned elt;
6587 repeat_factor *rf;
6588 fputs ("Building __builtin_pow call for cached product (",
6589 dump_file);
6590 for (elt = j; elt < vec_len; elt++)
6592 rf = &repeat_factor_vec[elt];
6593 print_generic_expr (dump_file, rf->factor);
6594 if (elt < vec_len - 1)
6595 fputs (" * ", dump_file);
6597 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n",
6598 power);
6602 else
6604 /* Otherwise, find the first factor in the repeated factor
6605 vector whose occurrence count is at least 2. If no such
6606 factor exists, there are no builtin_powi opportunities
6607 remaining. */
6608 FOR_EACH_VEC_ELT (repeat_factor_vec, j, rf1)
6610 if (rf1->count >= 2)
6611 break;
6614 if (j >= vec_len)
6615 break;
6617 power = rf1->count;
6619 if (dump_file && (dump_flags & TDF_DETAILS))
6621 unsigned elt;
6622 repeat_factor *rf;
6623 fputs ("Building __builtin_pow call for (", dump_file);
6624 for (elt = j; elt < vec_len; elt++)
6626 rf = &repeat_factor_vec[elt];
6627 print_generic_expr (dump_file, rf->factor);
6628 if (elt < vec_len - 1)
6629 fputs (" * ", dump_file);
6631 fprintf (dump_file, ")^" HOST_WIDE_INT_PRINT_DEC"\n", power);
6634 reassociate_stats.pows_created++;
6636 /* Visit each element of the vector in reverse order (so that
6637 high-occurrence elements are visited first, and within the
6638 same occurrence count, lower-ranked elements are visited
6639 first). Form a linear product of all elements in this order
6640 whose occurrencce count is at least that of element J.
6641 Record the SSA name representing the product of each element
6642 with all subsequent elements in the vector. */
6643 if (j == vec_len - 1)
6644 rf1->repr = rf1->factor;
6645 else
6647 for (ii = vec_len - 2; ii >= (int)j; ii--)
6649 tree op1, op2;
6651 rf1 = &repeat_factor_vec[ii];
6652 rf2 = &repeat_factor_vec[ii + 1];
6654 /* Init the last factor's representative to be itself. */
6655 if (!rf2->repr)
6656 rf2->repr = rf2->factor;
6658 op1 = rf1->factor;
6659 op2 = rf2->repr;
6661 target_ssa = make_temp_ssa_name (type, NULL, "reassocpow");
6662 mul_stmt = gimple_build_assign (target_ssa, MULT_EXPR,
6663 op1, op2);
6664 gimple_set_location (mul_stmt, gimple_location (stmt));
6665 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6666 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6667 rf1->repr = target_ssa;
6669 /* Don't reprocess the multiply we just introduced. */
6670 gimple_set_visited (mul_stmt, true);
6674 /* Form a call to __builtin_powi for the maximum product
6675 just formed, raised to the power obtained earlier. */
6676 rf1 = &repeat_factor_vec[j];
6677 if (INTEGRAL_TYPE_P (type))
6679 gcc_assert (power > 1);
6680 gimple_stmt_iterator gsip = gsi;
6681 gsi_prev (&gsip);
6682 iter_result = powi_as_mults (&gsi, gimple_location (stmt),
6683 rf1->repr, power);
6684 gimple_stmt_iterator gsic = gsi;
6685 while (gsi_stmt (gsic) != gsi_stmt (gsip))
6687 gimple_set_uid (gsi_stmt (gsic), gimple_uid (stmt));
6688 gimple_set_visited (gsi_stmt (gsic), true);
6689 gsi_prev (&gsic);
6692 else
6694 iter_result = make_temp_ssa_name (type, NULL, "reassocpow");
6695 pow_stmt = gimple_build_call (powi_fndecl, 2, rf1->repr,
6696 build_int_cst (integer_type_node,
6697 power));
6698 gimple_call_set_lhs (pow_stmt, iter_result);
6699 gimple_set_location (pow_stmt, gimple_location (stmt));
6700 gimple_set_uid (pow_stmt, gimple_uid (stmt));
6701 gsi_insert_before (&gsi, pow_stmt, GSI_SAME_STMT);
6705 /* If we previously formed at least one other builtin_powi call,
6706 form the product of this one and those others. */
6707 if (result)
6709 tree new_result = make_temp_ssa_name (type, NULL, "reassocpow");
6710 mul_stmt = gimple_build_assign (new_result, MULT_EXPR,
6711 result, iter_result);
6712 gimple_set_location (mul_stmt, gimple_location (stmt));
6713 gimple_set_uid (mul_stmt, gimple_uid (stmt));
6714 gsi_insert_before (&gsi, mul_stmt, GSI_SAME_STMT);
6715 gimple_set_visited (mul_stmt, true);
6716 result = new_result;
6718 else
6719 result = iter_result;
6721 /* Decrement the occurrence count of each element in the product
6722 by the count found above, and remove this many copies of each
6723 factor from OPS. */
6724 for (i = j; i < vec_len; i++)
6726 unsigned k = power;
6727 unsigned n;
6729 rf1 = &repeat_factor_vec[i];
6730 rf1->count -= power;
6732 FOR_EACH_VEC_ELT_REVERSE (*ops, n, oe)
6734 if (oe->op == rf1->factor)
6736 if (oe->count <= k)
6738 ops->ordered_remove (n);
6739 k -= oe->count;
6741 if (k == 0)
6742 break;
6744 else
6746 oe->count -= k;
6747 break;
6754 /* At this point all elements in the repeated factor vector have a
6755 remaining occurrence count of 0 or 1, and those with a count of 1
6756 don't have cached representatives. Re-sort the ops vector and
6757 clean up. */
6758 ops->qsort (sort_by_operand_rank);
6759 repeat_factor_vec.release ();
6761 /* Return the final product computed herein. Note that there may
6762 still be some elements with single occurrence count left in OPS;
6763 those will be handled by the normal reassociation logic. */
6764 return result;
6767 /* Attempt to optimize
6768 CST1 * copysign (CST2, y) -> copysign (CST1 * CST2, y) if CST1 > 0, or
6769 CST1 * copysign (CST2, y) -> -copysign (CST1 * CST2, y) if CST1 < 0. */
6771 static void
6772 attempt_builtin_copysign (vec<operand_entry *> *ops)
6774 operand_entry *oe;
6775 unsigned int i;
6776 unsigned int length = ops->length ();
6777 tree cst = ops->last ()->op;
6779 if (length == 1 || TREE_CODE (cst) != REAL_CST)
6780 return;
6782 FOR_EACH_VEC_ELT (*ops, i, oe)
6784 if (TREE_CODE (oe->op) == SSA_NAME
6785 && has_single_use (oe->op))
6787 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6788 if (gcall *old_call = dyn_cast <gcall *> (def_stmt))
6790 tree arg0, arg1;
6791 switch (gimple_call_combined_fn (old_call))
6793 CASE_CFN_COPYSIGN:
6794 CASE_CFN_COPYSIGN_FN:
6795 arg0 = gimple_call_arg (old_call, 0);
6796 arg1 = gimple_call_arg (old_call, 1);
6797 /* The first argument of copysign must be a constant,
6798 otherwise there's nothing to do. */
6799 if (TREE_CODE (arg0) == REAL_CST)
6801 tree type = TREE_TYPE (arg0);
6802 tree mul = const_binop (MULT_EXPR, type, cst, arg0);
6803 /* If we couldn't fold to a single constant, skip it.
6804 That happens e.g. for inexact multiplication when
6805 -frounding-math. */
6806 if (mul == NULL_TREE)
6807 break;
6808 /* Instead of adjusting OLD_CALL, let's build a new
6809 call to not leak the LHS and prevent keeping bogus
6810 debug statements. DCE will clean up the old call. */
6811 gcall *new_call;
6812 if (gimple_call_internal_p (old_call))
6813 new_call = gimple_build_call_internal
6814 (IFN_COPYSIGN, 2, mul, arg1);
6815 else
6816 new_call = gimple_build_call
6817 (gimple_call_fndecl (old_call), 2, mul, arg1);
6818 tree lhs = make_ssa_name (type);
6819 gimple_call_set_lhs (new_call, lhs);
6820 gimple_set_location (new_call,
6821 gimple_location (old_call));
6822 insert_stmt_after (new_call, old_call);
6823 /* We've used the constant, get rid of it. */
6824 ops->pop ();
6825 bool cst1_neg = real_isneg (TREE_REAL_CST_PTR (cst));
6826 /* Handle the CST1 < 0 case by negating the result. */
6827 if (cst1_neg)
6829 tree negrhs = make_ssa_name (TREE_TYPE (lhs));
6830 gimple *negate_stmt
6831 = gimple_build_assign (negrhs, NEGATE_EXPR, lhs);
6832 insert_stmt_after (negate_stmt, new_call);
6833 oe->op = negrhs;
6835 else
6836 oe->op = lhs;
6837 if (dump_file && (dump_flags & TDF_DETAILS))
6839 fprintf (dump_file, "Optimizing copysign: ");
6840 print_generic_expr (dump_file, cst);
6841 fprintf (dump_file, " * COPYSIGN (");
6842 print_generic_expr (dump_file, arg0);
6843 fprintf (dump_file, ", ");
6844 print_generic_expr (dump_file, arg1);
6845 fprintf (dump_file, ") into %sCOPYSIGN (",
6846 cst1_neg ? "-" : "");
6847 print_generic_expr (dump_file, mul);
6848 fprintf (dump_file, ", ");
6849 print_generic_expr (dump_file, arg1);
6850 fprintf (dump_file, "\n");
6852 return;
6854 break;
6855 default:
6856 break;
6863 /* Transform STMT at *GSI into a copy by replacing its rhs with NEW_RHS. */
6865 static void
6866 transform_stmt_to_copy (gimple_stmt_iterator *gsi, gimple *stmt, tree new_rhs)
6868 tree rhs1;
6870 if (dump_file && (dump_flags & TDF_DETAILS))
6872 fprintf (dump_file, "Transforming ");
6873 print_gimple_stmt (dump_file, stmt, 0);
6876 rhs1 = gimple_assign_rhs1 (stmt);
6877 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
6878 update_stmt (stmt);
6879 remove_visited_stmt_chain (rhs1);
6881 if (dump_file && (dump_flags & TDF_DETAILS))
6883 fprintf (dump_file, " into ");
6884 print_gimple_stmt (dump_file, stmt, 0);
6888 /* Transform STMT at *GSI into a multiply of RHS1 and RHS2. */
6890 static void
6891 transform_stmt_to_multiply (gimple_stmt_iterator *gsi, gimple *stmt,
6892 tree rhs1, tree rhs2)
6894 if (dump_file && (dump_flags & TDF_DETAILS))
6896 fprintf (dump_file, "Transforming ");
6897 print_gimple_stmt (dump_file, stmt, 0);
6900 gimple_assign_set_rhs_with_ops (gsi, MULT_EXPR, rhs1, rhs2);
6901 update_stmt (gsi_stmt (*gsi));
6902 remove_visited_stmt_chain (rhs1);
6904 if (dump_file && (dump_flags & TDF_DETAILS))
6906 fprintf (dump_file, " into ");
6907 print_gimple_stmt (dump_file, stmt, 0);
6911 /* Rearrange ops may have more FMA when the chain may has more than 2 FMAs.
6912 Put no-mult ops and mult ops alternately at the end of the queue, which is
6913 conducive to generating more FMA and reducing the loss of FMA when breaking
6914 the chain.
6915 E.g.
6916 a * b + c * d + e generates:
6918 _4 = c_9(D) * d_10(D);
6919 _12 = .FMA (a_7(D), b_8(D), _4);
6920 _11 = e_6(D) + _12;
6922 Rearrange ops to -> e + a * b + c * d generates:
6924 _4 = .FMA (c_7(D), d_8(D), _3);
6925 _11 = .FMA (a_5(D), b_6(D), _4);
6927 Return the number of MULT_EXPRs in the chain. */
6928 static int
6929 rank_ops_for_fma (vec<operand_entry *> *ops)
6931 operand_entry *oe;
6932 unsigned int i;
6933 unsigned int ops_length = ops->length ();
6934 auto_vec<operand_entry *> ops_mult;
6935 auto_vec<operand_entry *> ops_others;
6937 FOR_EACH_VEC_ELT (*ops, i, oe)
6939 if (TREE_CODE (oe->op) == SSA_NAME)
6941 gimple *def_stmt = SSA_NAME_DEF_STMT (oe->op);
6942 if (is_gimple_assign (def_stmt))
6944 if (gimple_assign_rhs_code (def_stmt) == MULT_EXPR)
6945 ops_mult.safe_push (oe);
6946 /* A negate on the multiplication leads to FNMA. */
6947 else if (gimple_assign_rhs_code (def_stmt) == NEGATE_EXPR
6948 && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
6950 gimple *neg_def_stmt
6951 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (def_stmt));
6952 if (is_gimple_assign (neg_def_stmt)
6953 && gimple_bb (neg_def_stmt) == gimple_bb (def_stmt)
6954 && gimple_assign_rhs_code (neg_def_stmt) == MULT_EXPR)
6955 ops_mult.safe_push (oe);
6956 else
6957 ops_others.safe_push (oe);
6959 else
6960 ops_others.safe_push (oe);
6962 else
6963 ops_others.safe_push (oe);
6965 else
6966 ops_others.safe_push (oe);
6968 /* 1. When ops_mult.length == 2, like the following case,
6970 a * b + c * d + e.
6972 we need to rearrange the ops.
6974 Putting ops that not def from mult in front can generate more FMAs.
6976 2. If all ops are defined with mult, we don't need to rearrange them. */
6977 unsigned mult_num = ops_mult.length ();
6978 if (mult_num >= 2 && mult_num != ops_length)
6980 /* Put no-mult ops and mult ops alternately at the end of the
6981 queue, which is conducive to generating more FMA and reducing the
6982 loss of FMA when breaking the chain. */
6983 ops->truncate (0);
6984 ops->splice (ops_mult);
6985 int j, opindex = ops->length ();
6986 int others_length = ops_others.length ();
6987 for (j = 0; j < others_length; j++)
6989 oe = ops_others.pop ();
6990 ops->quick_insert (opindex, oe);
6991 if (opindex > 0)
6992 opindex--;
6995 return mult_num;
6997 /* Reassociate expressions in basic block BB and its post-dominator as
6998 children.
7000 Bubble up return status from maybe_optimize_range_tests. */
7002 static bool
7003 reassociate_bb (basic_block bb)
7005 gimple_stmt_iterator gsi;
7006 gimple *stmt = last_nondebug_stmt (bb);
7007 bool cfg_cleanup_needed = false;
7009 if (stmt && !gimple_visited_p (stmt))
7010 cfg_cleanup_needed |= maybe_optimize_range_tests (stmt);
7012 bool do_prev = false;
7013 for (gsi = gsi_last_bb (bb);
7014 !gsi_end_p (gsi); do_prev ? gsi_prev (&gsi) : (void) 0)
7016 do_prev = true;
7017 stmt = gsi_stmt (gsi);
7019 if (is_gimple_assign (stmt)
7020 && !stmt_could_throw_p (cfun, stmt))
7022 tree lhs, rhs1, rhs2;
7023 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
7025 /* If this was part of an already processed statement,
7026 we don't need to touch it again. */
7027 if (gimple_visited_p (stmt))
7029 /* This statement might have become dead because of previous
7030 reassociations. */
7031 if (has_zero_uses (gimple_get_lhs (stmt)))
7033 reassoc_remove_stmt (&gsi);
7034 release_defs (stmt);
7035 /* We might end up removing the last stmt above which
7036 places the iterator to the end of the sequence.
7037 Reset it to the last stmt in this case and make sure
7038 we don't do gsi_prev in that case. */
7039 if (gsi_end_p (gsi))
7041 gsi = gsi_last_bb (bb);
7042 do_prev = false;
7045 continue;
7048 /* If this is not a gimple binary expression, there is
7049 nothing for us to do with it. */
7050 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
7051 continue;
7053 lhs = gimple_assign_lhs (stmt);
7054 rhs1 = gimple_assign_rhs1 (stmt);
7055 rhs2 = gimple_assign_rhs2 (stmt);
7057 /* For non-bit or min/max operations we can't associate
7058 all types. Verify that here. */
7059 if ((rhs_code != BIT_IOR_EXPR
7060 && rhs_code != BIT_AND_EXPR
7061 && rhs_code != BIT_XOR_EXPR
7062 && rhs_code != MIN_EXPR
7063 && rhs_code != MAX_EXPR
7064 && !can_reassociate_type_p (TREE_TYPE (lhs)))
7065 || !can_reassociate_op_p (rhs1)
7066 || !can_reassociate_op_p (rhs2))
7067 continue;
7069 if (associative_tree_code (rhs_code))
7071 auto_vec<operand_entry *> ops;
7072 tree powi_result = NULL_TREE;
7073 bool is_vector = VECTOR_TYPE_P (TREE_TYPE (lhs));
7075 /* There may be no immediate uses left by the time we
7076 get here because we may have eliminated them all. */
7077 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
7078 continue;
7080 gimple_set_visited (stmt, true);
7081 linearize_expr_tree (&ops, stmt, true, true);
7082 ops.qsort (sort_by_operand_rank);
7083 int orig_len = ops.length ();
7084 optimize_ops_list (rhs_code, &ops);
7085 if (undistribute_ops_list (rhs_code, &ops,
7086 loop_containing_stmt (stmt)))
7088 ops.qsort (sort_by_operand_rank);
7089 optimize_ops_list (rhs_code, &ops);
7091 if (undistribute_bitref_for_vector (rhs_code, &ops,
7092 loop_containing_stmt (stmt)))
7094 ops.qsort (sort_by_operand_rank);
7095 optimize_ops_list (rhs_code, &ops);
7097 if (rhs_code == PLUS_EXPR
7098 && transform_add_to_multiply (&ops))
7099 ops.qsort (sort_by_operand_rank);
7101 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
7103 if (is_vector)
7104 optimize_vec_cond_expr (rhs_code, &ops);
7105 else
7106 optimize_range_tests (rhs_code, &ops, NULL);
7109 if (rhs_code == MULT_EXPR && !is_vector)
7111 attempt_builtin_copysign (&ops);
7113 if (reassoc_insert_powi_p
7114 && (flag_unsafe_math_optimizations
7115 || (INTEGRAL_TYPE_P (TREE_TYPE (lhs)))))
7116 powi_result = attempt_builtin_powi (stmt, &ops);
7119 operand_entry *last;
7120 bool negate_result = false;
7121 if (ops.length () > 1
7122 && rhs_code == MULT_EXPR)
7124 last = ops.last ();
7125 if ((integer_minus_onep (last->op)
7126 || real_minus_onep (last->op))
7127 && !HONOR_SNANS (TREE_TYPE (lhs))
7128 && (!HONOR_SIGNED_ZEROS (TREE_TYPE (lhs))
7129 || !COMPLEX_FLOAT_TYPE_P (TREE_TYPE (lhs))))
7131 ops.pop ();
7132 negate_result = true;
7136 tree new_lhs = lhs;
7137 /* If the operand vector is now empty, all operands were
7138 consumed by the __builtin_powi optimization. */
7139 if (ops.length () == 0)
7140 transform_stmt_to_copy (&gsi, stmt, powi_result);
7141 else if (ops.length () == 1)
7143 tree last_op = ops.last ()->op;
7145 /* If the stmt that defines operand has to be inserted, insert it
7146 before the use. */
7147 if (ops.last ()->stmt_to_insert)
7148 insert_stmt_before_use (stmt, ops.last ()->stmt_to_insert);
7149 if (powi_result)
7150 transform_stmt_to_multiply (&gsi, stmt, last_op,
7151 powi_result);
7152 else
7153 transform_stmt_to_copy (&gsi, stmt, last_op);
7155 else
7157 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
7158 int ops_num = ops.length ();
7159 int width = 0;
7160 int mult_num = 0;
7162 /* For binary bit operations, if there are at least 3
7163 operands and the last operand in OPS is a constant,
7164 move it to the front. This helps ensure that we generate
7165 (X & Y) & C rather than (X & C) & Y. The former will
7166 often match a canonical bit test when we get to RTL. */
7167 if (ops.length () > 2
7168 && (rhs_code == BIT_AND_EXPR
7169 || rhs_code == BIT_IOR_EXPR
7170 || rhs_code == BIT_XOR_EXPR)
7171 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
7172 std::swap (*ops[0], *ops[ops_num - 1]);
7174 optimization_type opt_type = bb_optimization_type (bb);
7176 /* If the target support FMA, rank_ops_for_fma will detect if
7177 the chain has fmas and rearrange the ops if so. */
7178 if (direct_internal_fn_supported_p (IFN_FMA,
7179 TREE_TYPE (lhs),
7180 opt_type)
7181 && (rhs_code == PLUS_EXPR || rhs_code == MINUS_EXPR))
7183 mult_num = rank_ops_for_fma (&ops);
7186 /* Only rewrite the expression tree to parallel in the
7187 last reassoc pass to avoid useless work back-and-forth
7188 with initial linearization. */
7189 bool has_fma = mult_num >= 2 && mult_num != ops_num;
7190 if (!reassoc_insert_powi_p
7191 && ops.length () > 3
7192 && (width = get_reassociation_width (&ops, mult_num, lhs,
7193 rhs_code, mode))
7194 > 1)
7196 if (dump_file && (dump_flags & TDF_DETAILS))
7197 fprintf (dump_file,
7198 "Width = %d was chosen for reassociation\n",
7199 width);
7200 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
7201 width,
7202 has_fma,
7203 ops);
7205 else
7207 /* When there are three operands left, we want
7208 to make sure the ones that get the double
7209 binary op are chosen wisely. */
7210 int len = ops.length ();
7211 if (len >= 3
7212 && (!has_fma
7213 /* width > 1 means ranking ops results in better
7214 parallelism. Check current value to avoid
7215 calling get_reassociation_width again. */
7216 || (width != 1
7217 && get_reassociation_width (
7218 &ops, mult_num, lhs, rhs_code, mode)
7219 > 1)))
7220 swap_ops_for_binary_stmt (ops, len - 3);
7222 new_lhs = rewrite_expr_tree (stmt, rhs_code, 0, ops,
7223 powi_result != NULL
7224 || negate_result,
7225 len != orig_len);
7228 /* If we combined some repeated factors into a
7229 __builtin_powi call, multiply that result by the
7230 reassociated operands. */
7231 if (powi_result)
7233 gimple *mul_stmt, *lhs_stmt = SSA_NAME_DEF_STMT (lhs);
7234 tree type = TREE_TYPE (lhs);
7235 tree target_ssa = make_temp_ssa_name (type, NULL,
7236 "reassocpow");
7237 gimple_set_lhs (lhs_stmt, target_ssa);
7238 update_stmt (lhs_stmt);
7239 if (lhs != new_lhs)
7241 target_ssa = new_lhs;
7242 new_lhs = lhs;
7244 mul_stmt = gimple_build_assign (lhs, MULT_EXPR,
7245 powi_result, target_ssa);
7246 gimple_set_location (mul_stmt, gimple_location (stmt));
7247 gimple_set_uid (mul_stmt, gimple_uid (stmt));
7248 gsi_insert_after (&gsi, mul_stmt, GSI_NEW_STMT);
7252 if (negate_result)
7254 stmt = SSA_NAME_DEF_STMT (lhs);
7255 tree tmp = make_ssa_name (TREE_TYPE (lhs));
7256 gimple_set_lhs (stmt, tmp);
7257 if (lhs != new_lhs)
7258 tmp = new_lhs;
7259 gassign *neg_stmt = gimple_build_assign (lhs, NEGATE_EXPR,
7260 tmp);
7261 gimple_set_uid (neg_stmt, gimple_uid (stmt));
7262 gsi_insert_after (&gsi, neg_stmt, GSI_NEW_STMT);
7263 update_stmt (stmt);
7269 return cfg_cleanup_needed;
7272 /* Add jumps around shifts for range tests turned into bit tests.
7273 For each SSA_NAME VAR we have code like:
7274 VAR = ...; // final stmt of range comparison
7275 // bit test here...;
7276 OTHERVAR = ...; // final stmt of the bit test sequence
7277 RES = VAR | OTHERVAR;
7278 Turn the above into:
7279 VAR = ...;
7280 if (VAR != 0)
7281 goto <l3>;
7282 else
7283 goto <l2>;
7284 <l2>:
7285 // bit test here...;
7286 OTHERVAR = ...;
7287 <l3>:
7288 # RES = PHI<1(l1), OTHERVAR(l2)>; */
7290 static void
7291 branch_fixup (void)
7293 tree var;
7294 unsigned int i;
7296 FOR_EACH_VEC_ELT (reassoc_branch_fixups, i, var)
7298 gimple *def_stmt = SSA_NAME_DEF_STMT (var);
7299 gimple *use_stmt;
7300 use_operand_p use;
7301 bool ok = single_imm_use (var, &use, &use_stmt);
7302 gcc_assert (ok
7303 && is_gimple_assign (use_stmt)
7304 && gimple_assign_rhs_code (use_stmt) == BIT_IOR_EXPR
7305 && gimple_bb (def_stmt) == gimple_bb (use_stmt));
7307 basic_block cond_bb = gimple_bb (def_stmt);
7308 basic_block then_bb = split_block (cond_bb, def_stmt)->dest;
7309 basic_block merge_bb = split_block (then_bb, use_stmt)->dest;
7311 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
7312 gimple *g = gimple_build_cond (NE_EXPR, var,
7313 build_zero_cst (TREE_TYPE (var)),
7314 NULL_TREE, NULL_TREE);
7315 location_t loc = gimple_location (use_stmt);
7316 gimple_set_location (g, loc);
7317 gsi_insert_after (&gsi, g, GSI_NEW_STMT);
7319 edge etrue = make_edge (cond_bb, merge_bb, EDGE_TRUE_VALUE);
7320 etrue->probability = profile_probability::even ();
7321 edge efalse = find_edge (cond_bb, then_bb);
7322 efalse->flags = EDGE_FALSE_VALUE;
7323 efalse->probability -= etrue->probability;
7324 then_bb->count -= etrue->count ();
7326 tree othervar = NULL_TREE;
7327 if (gimple_assign_rhs1 (use_stmt) == var)
7328 othervar = gimple_assign_rhs2 (use_stmt);
7329 else if (gimple_assign_rhs2 (use_stmt) == var)
7330 othervar = gimple_assign_rhs1 (use_stmt);
7331 else
7332 gcc_unreachable ();
7333 tree lhs = gimple_assign_lhs (use_stmt);
7334 gphi *phi = create_phi_node (lhs, merge_bb);
7335 add_phi_arg (phi, build_one_cst (TREE_TYPE (lhs)), etrue, loc);
7336 add_phi_arg (phi, othervar, single_succ_edge (then_bb), loc);
7337 gsi = gsi_for_stmt (use_stmt);
7338 gsi_remove (&gsi, true);
7340 set_immediate_dominator (CDI_DOMINATORS, merge_bb, cond_bb);
7341 set_immediate_dominator (CDI_POST_DOMINATORS, cond_bb, merge_bb);
7343 reassoc_branch_fixups.release ();
7346 void dump_ops_vector (FILE *file, vec<operand_entry *> ops);
7347 void debug_ops_vector (vec<operand_entry *> ops);
7349 /* Dump the operand entry vector OPS to FILE. */
7351 void
7352 dump_ops_vector (FILE *file, vec<operand_entry *> ops)
7354 operand_entry *oe;
7355 unsigned int i;
7357 FOR_EACH_VEC_ELT (ops, i, oe)
7359 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
7360 print_generic_expr (file, oe->op);
7361 fprintf (file, "\n");
7365 /* Dump the operand entry vector OPS to STDERR. */
7367 DEBUG_FUNCTION void
7368 debug_ops_vector (vec<operand_entry *> ops)
7370 dump_ops_vector (stderr, ops);
7373 /* Bubble up return status from reassociate_bb. */
7375 static bool
7376 do_reassoc ()
7378 bool cfg_cleanup_needed = false;
7379 basic_block *worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
7381 unsigned sp = 0;
7382 for (auto son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
7383 son; son = next_dom_son (CDI_DOMINATORS, son))
7384 worklist[sp++] = son;
7385 while (sp)
7387 basic_block bb = worklist[--sp];
7388 break_up_subtract_bb (bb);
7389 for (auto son = first_dom_son (CDI_DOMINATORS, bb);
7390 son; son = next_dom_son (CDI_DOMINATORS, son))
7391 worklist[sp++] = son;
7394 for (auto son = first_dom_son (CDI_POST_DOMINATORS,
7395 EXIT_BLOCK_PTR_FOR_FN (cfun));
7396 son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7397 worklist[sp++] = son;
7398 while (sp)
7400 basic_block bb = worklist[--sp];
7401 cfg_cleanup_needed |= reassociate_bb (bb);
7402 for (auto son = first_dom_son (CDI_POST_DOMINATORS, bb);
7403 son; son = next_dom_son (CDI_POST_DOMINATORS, son))
7404 worklist[sp++] = son;
7407 free (worklist);
7408 return cfg_cleanup_needed;
7411 /* Initialize the reassociation pass. */
7413 static void
7414 init_reassoc (void)
7416 int i;
7417 int64_t rank = 2;
7418 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS);
7420 /* Find the loops, so that we can prevent moving calculations in
7421 them. */
7422 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
7424 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
7426 next_operand_entry_id = 0;
7428 /* Reverse RPO (Reverse Post Order) will give us something where
7429 deeper loops come later. */
7430 pre_and_rev_post_order_compute (NULL, bbs, false);
7431 bb_rank = XCNEWVEC (int64_t, last_basic_block_for_fn (cfun));
7432 operand_rank = new hash_map<tree, int64_t>;
7434 /* Give each default definition a distinct rank. This includes
7435 parameters and the static chain. Walk backwards over all
7436 SSA names so that we get proper rank ordering according
7437 to tree_swap_operands_p. */
7438 for (i = num_ssa_names - 1; i > 0; --i)
7440 tree name = ssa_name (i);
7441 if (name && SSA_NAME_IS_DEFAULT_DEF (name))
7442 insert_operand_rank (name, ++rank);
7445 /* Set up rank for each BB */
7446 for (i = 0; i < n_basic_blocks_for_fn (cfun) - NUM_FIXED_BLOCKS; i++)
7447 bb_rank[bbs[i]] = ++rank << 16;
7449 free (bbs);
7450 calculate_dominance_info (CDI_POST_DOMINATORS);
7451 plus_negates = vNULL;
7452 mark_ssa_maybe_undefs ();
7455 /* Cleanup after the reassociation pass, and print stats if
7456 requested. */
7458 static void
7459 fini_reassoc (void)
7461 statistics_counter_event (cfun, "Linearized",
7462 reassociate_stats.linearized);
7463 statistics_counter_event (cfun, "Constants eliminated",
7464 reassociate_stats.constants_eliminated);
7465 statistics_counter_event (cfun, "Ops eliminated",
7466 reassociate_stats.ops_eliminated);
7467 statistics_counter_event (cfun, "Statements rewritten",
7468 reassociate_stats.rewritten);
7469 statistics_counter_event (cfun, "Built-in pow[i] calls encountered",
7470 reassociate_stats.pows_encountered);
7471 statistics_counter_event (cfun, "Built-in powi calls created",
7472 reassociate_stats.pows_created);
7474 delete operand_rank;
7475 bitmap_clear (biased_names);
7476 operand_entry_pool.release ();
7477 free (bb_rank);
7478 plus_negates.release ();
7479 free_dominance_info (CDI_POST_DOMINATORS);
7480 loop_optimizer_finalize ();
7483 /* Gate and execute functions for Reassociation. If INSERT_POWI_P, enable
7484 insertion of __builtin_powi calls.
7486 Returns TODO_cfg_cleanup if a CFG cleanup pass is desired due to
7487 optimization of a gimple conditional. Otherwise returns zero. */
7489 static unsigned int
7490 execute_reassoc (bool insert_powi_p, bool bias_loop_carried_phi_ranks_p)
7492 reassoc_insert_powi_p = insert_powi_p;
7493 reassoc_bias_loop_carried_phi_ranks_p = bias_loop_carried_phi_ranks_p;
7495 init_reassoc ();
7497 bool cfg_cleanup_needed;
7498 cfg_cleanup_needed = do_reassoc ();
7499 repropagate_negates ();
7500 branch_fixup ();
7502 fini_reassoc ();
7503 return cfg_cleanup_needed ? TODO_cleanup_cfg : 0;
7506 namespace {
7508 const pass_data pass_data_reassoc =
7510 GIMPLE_PASS, /* type */
7511 "reassoc", /* name */
7512 OPTGROUP_NONE, /* optinfo_flags */
7513 TV_TREE_REASSOC, /* tv_id */
7514 ( PROP_cfg | PROP_ssa ), /* properties_required */
7515 0, /* properties_provided */
7516 0, /* properties_destroyed */
7517 0, /* todo_flags_start */
7518 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
7521 class pass_reassoc : public gimple_opt_pass
7523 public:
7524 pass_reassoc (gcc::context *ctxt)
7525 : gimple_opt_pass (pass_data_reassoc, ctxt), insert_powi_p (false)
7528 /* opt_pass methods: */
7529 opt_pass * clone () final override { return new pass_reassoc (m_ctxt); }
7530 void set_pass_param (unsigned int n, bool param) final override
7532 gcc_assert (n == 0);
7533 insert_powi_p = param;
7534 bias_loop_carried_phi_ranks_p = !param;
7536 bool gate (function *) final override { return flag_tree_reassoc != 0; }
7537 unsigned int execute (function *) final override
7539 return execute_reassoc (insert_powi_p, bias_loop_carried_phi_ranks_p);
7542 private:
7543 /* Enable insertion of __builtin_powi calls during execute_reassoc. See
7544 point 3a in the pass header comment. */
7545 bool insert_powi_p;
7546 bool bias_loop_carried_phi_ranks_p;
7547 }; // class pass_reassoc
7549 } // anon namespace
7551 gimple_opt_pass *
7552 make_pass_reassoc (gcc::context *ctxt)
7554 return new pass_reassoc (ctxt);