[PATCH] RISC-V: Move UNSPEC_SSP_SET and UNSPEC_SSP_TEST to correct enum
[gcc.git] / gcc / tree-ssa-pre.cc
blob735893bb191e1d47286cee58d07e33b66c025be6
1 /* Full and partial redundancy elimination and code hoisting on SSA GIMPLE.
2 Copyright (C) 2001-2025 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org> and Steven Bosscher
4 <stevenb@suse.de>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "predict.h"
30 #include "alloc-pool.h"
31 #include "tree-pass.h"
32 #include "ssa.h"
33 #include "cgraph.h"
34 #include "gimple-pretty-print.h"
35 #include "fold-const.h"
36 #include "cfganal.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "tree-cfg.h"
42 #include "tree-into-ssa.h"
43 #include "tree-dfa.h"
44 #include "tree-ssa.h"
45 #include "cfgloop.h"
46 #include "tree-ssa-sccvn.h"
47 #include "tree-scalar-evolution.h"
48 #include "dbgcnt.h"
49 #include "domwalk.h"
50 #include "tree-ssa-propagate.h"
51 #include "tree-ssa-dce.h"
52 #include "tree-cfgcleanup.h"
53 #include "alias.h"
54 #include "gimple-range.h"
56 /* Even though this file is called tree-ssa-pre.cc, we actually
57 implement a bit more than just PRE here. All of them piggy-back
58 on GVN which is implemented in tree-ssa-sccvn.cc.
60 1. Full Redundancy Elimination (FRE)
61 This is the elimination phase of GVN.
63 2. Partial Redundancy Elimination (PRE)
64 This is adds computation of AVAIL_OUT and ANTIC_IN and
65 doing expression insertion to form GVN-PRE.
67 3. Code hoisting
68 This optimization uses the ANTIC_IN sets computed for PRE
69 to move expressions further up than PRE would do, to make
70 multiple computations of the same value fully redundant.
71 This pass is explained below (after the explanation of the
72 basic algorithm for PRE).
75 /* TODO:
77 1. Avail sets can be shared by making an avail_find_leader that
78 walks up the dominator tree and looks in those avail sets.
79 This might affect code optimality, it's unclear right now.
80 Currently the AVAIL_OUT sets are the remaining quadraticness in
81 memory of GVN-PRE.
82 2. Strength reduction can be performed by anticipating expressions
83 we can repair later on.
84 3. We can do back-substitution or smarter value numbering to catch
85 commutative expressions split up over multiple statements.
88 /* For ease of terminology, "expression node" in the below refers to
89 every expression node but GIMPLE_ASSIGN, because GIMPLE_ASSIGNs
90 represent the actual statement containing the expressions we care about,
91 and we cache the value number by putting it in the expression. */
93 /* Basic algorithm for Partial Redundancy Elimination:
95 First we walk the statements to generate the AVAIL sets, the
96 EXP_GEN sets, and the tmp_gen sets. EXP_GEN sets represent the
97 generation of values/expressions by a given block. We use them
98 when computing the ANTIC sets. The AVAIL sets consist of
99 SSA_NAME's that represent values, so we know what values are
100 available in what blocks. AVAIL is a forward dataflow problem. In
101 SSA, values are never killed, so we don't need a kill set, or a
102 fixpoint iteration, in order to calculate the AVAIL sets. In
103 traditional parlance, AVAIL sets tell us the downsafety of the
104 expressions/values.
106 Next, we generate the ANTIC sets. These sets represent the
107 anticipatable expressions. ANTIC is a backwards dataflow
108 problem. An expression is anticipatable in a given block if it could
109 be generated in that block. This means that if we had to perform
110 an insertion in that block, of the value of that expression, we
111 could. Calculating the ANTIC sets requires phi translation of
112 expressions, because the flow goes backwards through phis. We must
113 iterate to a fixpoint of the ANTIC sets, because we have a kill
114 set. Even in SSA form, values are not live over the entire
115 function, only from their definition point onwards. So we have to
116 remove values from the ANTIC set once we go past the definition
117 point of the leaders that make them up.
118 compute_antic/compute_antic_aux performs this computation.
120 Third, we perform insertions to make partially redundant
121 expressions fully redundant.
123 An expression is partially redundant (excluding partial
124 anticipation) if:
126 1. It is AVAIL in some, but not all, of the predecessors of a
127 given block.
128 2. It is ANTIC in all the predecessors.
130 In order to make it fully redundant, we insert the expression into
131 the predecessors where it is not available, but is ANTIC.
133 When optimizing for size, we only eliminate the partial redundancy
134 if we need to insert in only one predecessor. This avoids almost
135 completely the code size increase that PRE usually causes.
137 For the partial anticipation case, we only perform insertion if it
138 is partially anticipated in some block, and fully available in all
139 of the predecessors.
141 do_pre_regular_insertion/do_pre_partial_partial_insertion
142 performs these steps, driven by insert/insert_aux.
144 Fourth, we eliminate fully redundant expressions.
145 This is a simple statement walk that replaces redundant
146 calculations with the now available values. */
148 /* Basic algorithm for Code Hoisting:
150 Code hoisting is: Moving value computations up in the control flow
151 graph to make multiple copies redundant. Typically this is a size
152 optimization, but there are cases where it also is helpful for speed.
154 A simple code hoisting algorithm is implemented that piggy-backs on
155 the PRE infrastructure. For code hoisting, we have to know ANTIC_OUT
156 which is effectively ANTIC_IN - AVAIL_OUT. The latter two have to be
157 computed for PRE, and we can use them to perform a limited version of
158 code hoisting, too.
160 For the purpose of this implementation, a value is hoistable to a basic
161 block B if the following properties are met:
163 1. The value is in ANTIC_IN(B) -- the value will be computed on all
164 paths from B to function exit and it can be computed in B);
166 2. The value is not in AVAIL_OUT(B) -- there would be no need to
167 compute the value again and make it available twice;
169 3. All successors of B are dominated by B -- makes sure that inserting
170 a computation of the value in B will make the remaining
171 computations fully redundant;
173 4. At least one successor has the value in AVAIL_OUT -- to avoid
174 hoisting values up too far;
176 5. There are at least two successors of B -- hoisting in straight
177 line code is pointless.
179 The third condition is not strictly necessary, but it would complicate
180 the hoisting pass a lot. In fact, I don't know of any code hoisting
181 algorithm that does not have this requirement. Fortunately, experiments
182 have show that most candidate hoistable values are in regions that meet
183 this condition (e.g. diamond-shape regions).
185 The forth condition is necessary to avoid hoisting things up too far
186 away from the uses of the value. Nothing else limits the algorithm
187 from hoisting everything up as far as ANTIC_IN allows. Experiments
188 with SPEC and CSiBE have shown that hoisting up too far results in more
189 spilling, less benefits for code size, and worse benchmark scores.
190 Fortunately, in practice most of the interesting hoisting opportunities
191 are caught despite this limitation.
193 For hoistable values that meet all conditions, expressions are inserted
194 to make the calculation of the hoistable value fully redundant. We
195 perform code hoisting insertions after each round of PRE insertions,
196 because code hoisting never exposes new PRE opportunities, but PRE can
197 create new code hoisting opportunities.
199 The code hoisting algorithm is implemented in do_hoist_insert, driven
200 by insert/insert_aux. */
202 /* Representations of value numbers:
204 Value numbers are represented by a representative SSA_NAME. We
205 will create fake SSA_NAME's in situations where we need a
206 representative but do not have one (because it is a complex
207 expression). In order to facilitate storing the value numbers in
208 bitmaps, and keep the number of wasted SSA_NAME's down, we also
209 associate a value_id with each value number, and create full blown
210 ssa_name's only where we actually need them (IE in operands of
211 existing expressions).
213 Theoretically you could replace all the value_id's with
214 SSA_NAME_VERSION, but this would allocate a large number of
215 SSA_NAME's (which are each > 30 bytes) just to get a 4 byte number.
216 It would also require an additional indirection at each point we
217 use the value id. */
219 /* Representation of expressions on value numbers:
221 Expressions consisting of value numbers are represented the same
222 way as our VN internally represents them, with an additional
223 "pre_expr" wrapping around them in order to facilitate storing all
224 of the expressions in the same sets. */
226 /* Representation of sets:
228 The dataflow sets do not need to be sorted in any particular order
229 for the majority of their lifetime, are simply represented as two
230 bitmaps, one that keeps track of values present in the set, and one
231 that keeps track of expressions present in the set.
233 When we need them in topological order, we produce it on demand by
234 transforming the bitmap into an array and sorting it into topo
235 order. */
237 /* Type of expression, used to know which member of the PRE_EXPR union
238 is valid. */
240 enum pre_expr_kind
242 NAME,
243 NARY,
244 REFERENCE,
245 CONSTANT
248 union pre_expr_union
250 tree name;
251 tree constant;
252 vn_nary_op_t nary;
253 vn_reference_t reference;
256 typedef struct pre_expr_d : nofree_ptr_hash <pre_expr_d>
258 enum pre_expr_kind kind;
259 unsigned int id;
260 unsigned value_id;
261 location_t loc;
262 pre_expr_union u;
264 /* hash_table support. */
265 static inline hashval_t hash (const pre_expr_d *);
266 static inline int equal (const pre_expr_d *, const pre_expr_d *);
267 } *pre_expr;
269 #define PRE_EXPR_NAME(e) (e)->u.name
270 #define PRE_EXPR_NARY(e) (e)->u.nary
271 #define PRE_EXPR_REFERENCE(e) (e)->u.reference
272 #define PRE_EXPR_CONSTANT(e) (e)->u.constant
274 /* Compare E1 and E1 for equality. */
276 inline int
277 pre_expr_d::equal (const pre_expr_d *e1, const pre_expr_d *e2)
279 if (e1->kind != e2->kind)
280 return false;
282 switch (e1->kind)
284 case CONSTANT:
285 return vn_constant_eq_with_type (PRE_EXPR_CONSTANT (e1),
286 PRE_EXPR_CONSTANT (e2));
287 case NAME:
288 return PRE_EXPR_NAME (e1) == PRE_EXPR_NAME (e2);
289 case NARY:
290 return vn_nary_op_eq (PRE_EXPR_NARY (e1), PRE_EXPR_NARY (e2));
291 case REFERENCE:
292 return vn_reference_eq (PRE_EXPR_REFERENCE (e1),
293 PRE_EXPR_REFERENCE (e2));
294 default:
295 gcc_unreachable ();
299 /* Hash E. */
301 inline hashval_t
302 pre_expr_d::hash (const pre_expr_d *e)
304 switch (e->kind)
306 case CONSTANT:
307 return vn_hash_constant_with_type (PRE_EXPR_CONSTANT (e));
308 case NAME:
309 return SSA_NAME_VERSION (PRE_EXPR_NAME (e));
310 case NARY:
311 return PRE_EXPR_NARY (e)->hashcode;
312 case REFERENCE:
313 return PRE_EXPR_REFERENCE (e)->hashcode;
314 default:
315 gcc_unreachable ();
319 /* Next global expression id number. */
320 static unsigned int next_expression_id;
322 /* Mapping from expression to id number we can use in bitmap sets. */
323 static vec<pre_expr> expressions;
324 static hash_table<pre_expr_d> *expression_to_id;
325 static vec<unsigned> name_to_id;
326 static obstack pre_expr_obstack;
328 /* Allocate an expression id for EXPR. */
330 static inline unsigned int
331 alloc_expression_id (pre_expr expr)
333 struct pre_expr_d **slot;
334 /* Make sure we won't overflow. */
335 gcc_assert (next_expression_id + 1 > next_expression_id);
336 expr->id = next_expression_id++;
337 expressions.safe_push (expr);
338 if (expr->kind == NAME)
340 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr));
341 /* vec::safe_grow_cleared allocates no headroom. Avoid frequent
342 re-allocations by using vec::reserve upfront. */
343 unsigned old_len = name_to_id.length ();
344 name_to_id.reserve (num_ssa_names - old_len);
345 name_to_id.quick_grow_cleared (num_ssa_names);
346 gcc_assert (name_to_id[version] == 0);
347 name_to_id[version] = expr->id;
349 else
351 slot = expression_to_id->find_slot (expr, INSERT);
352 gcc_assert (!*slot);
353 *slot = expr;
355 return next_expression_id - 1;
358 /* Return the expression id for tree EXPR. */
360 static inline unsigned int
361 get_expression_id (const pre_expr expr)
363 return expr->id;
366 static inline unsigned int
367 lookup_expression_id (const pre_expr expr)
369 struct pre_expr_d **slot;
371 if (expr->kind == NAME)
373 unsigned version = SSA_NAME_VERSION (PRE_EXPR_NAME (expr));
374 if (name_to_id.length () <= version)
375 return 0;
376 return name_to_id[version];
378 else
380 slot = expression_to_id->find_slot (expr, NO_INSERT);
381 if (!slot)
382 return 0;
383 return ((pre_expr)*slot)->id;
387 /* Return the expression that has expression id ID */
389 static inline pre_expr
390 expression_for_id (unsigned int id)
392 return expressions[id];
395 static object_allocator<pre_expr_d> pre_expr_pool ("pre_expr nodes");
397 /* Given an SSA_NAME NAME, get or create a pre_expr to represent it. */
399 static pre_expr
400 get_or_alloc_expr_for_name (tree name)
402 struct pre_expr_d expr;
403 pre_expr result;
404 unsigned int result_id;
406 expr.kind = NAME;
407 expr.id = 0;
408 PRE_EXPR_NAME (&expr) = name;
409 result_id = lookup_expression_id (&expr);
410 if (result_id != 0)
411 return expression_for_id (result_id);
413 result = pre_expr_pool.allocate ();
414 result->kind = NAME;
415 result->loc = UNKNOWN_LOCATION;
416 result->value_id = VN_INFO (name)->value_id;
417 PRE_EXPR_NAME (result) = name;
418 alloc_expression_id (result);
419 return result;
422 /* Given an NARY, get or create a pre_expr to represent it. Assign
423 VALUE_ID to it or allocate a new value-id if it is zero. Record
424 LOC as the original location of the expression. */
426 static pre_expr
427 get_or_alloc_expr_for_nary (vn_nary_op_t nary, unsigned value_id,
428 location_t loc = UNKNOWN_LOCATION)
430 struct pre_expr_d expr;
431 pre_expr result;
432 unsigned int result_id;
434 gcc_assert (value_id == 0 || !value_id_constant_p (value_id));
436 expr.kind = NARY;
437 expr.id = 0;
438 nary->hashcode = vn_nary_op_compute_hash (nary);
439 PRE_EXPR_NARY (&expr) = nary;
440 result_id = lookup_expression_id (&expr);
441 if (result_id != 0)
442 return expression_for_id (result_id);
444 result = pre_expr_pool.allocate ();
445 result->kind = NARY;
446 result->loc = loc;
447 result->value_id = value_id ? value_id : get_next_value_id ();
448 PRE_EXPR_NARY (result)
449 = alloc_vn_nary_op_noinit (nary->length, &pre_expr_obstack);
450 memcpy (PRE_EXPR_NARY (result), nary, sizeof_vn_nary_op (nary->length));
451 alloc_expression_id (result);
452 return result;
455 /* Given an REFERENCE, get or create a pre_expr to represent it. */
457 static pre_expr
458 get_or_alloc_expr_for_reference (vn_reference_t reference,
459 location_t loc = UNKNOWN_LOCATION)
461 struct pre_expr_d expr;
462 pre_expr result;
463 unsigned int result_id;
465 expr.kind = REFERENCE;
466 expr.id = 0;
467 PRE_EXPR_REFERENCE (&expr) = reference;
468 result_id = lookup_expression_id (&expr);
469 if (result_id != 0)
470 return expression_for_id (result_id);
472 result = pre_expr_pool.allocate ();
473 result->kind = REFERENCE;
474 result->loc = loc;
475 result->value_id = reference->value_id;
476 PRE_EXPR_REFERENCE (result) = reference;
477 alloc_expression_id (result);
478 return result;
482 /* An unordered bitmap set. One bitmap tracks values, the other,
483 expressions. */
484 typedef class bitmap_set
486 public:
487 bitmap_head expressions;
488 bitmap_head values;
489 } *bitmap_set_t;
491 #define FOR_EACH_EXPR_ID_IN_SET(set, id, bi) \
492 EXECUTE_IF_SET_IN_BITMAP (&(set)->expressions, 0, (id), (bi))
494 #define FOR_EACH_VALUE_ID_IN_SET(set, id, bi) \
495 EXECUTE_IF_SET_IN_BITMAP (&(set)->values, 0, (id), (bi))
497 /* Mapping from value id to expressions with that value_id. */
498 static vec<bitmap> value_expressions;
499 /* We just record a single expression for each constant value,
500 one of kind CONSTANT. */
501 static vec<pre_expr> constant_value_expressions;
504 /* This structure is used to keep track of statistics on what
505 optimization PRE was able to perform. */
506 static struct
508 /* The number of new expressions/temporaries generated by PRE. */
509 int insertions;
511 /* The number of inserts found due to partial anticipation */
512 int pa_insert;
514 /* The number of inserts made for code hoisting. */
515 int hoist_insert;
517 /* The number of new PHI nodes added by PRE. */
518 int phis;
519 } pre_stats;
521 static bool do_partial_partial;
522 static pre_expr bitmap_find_leader (bitmap_set_t, unsigned int);
523 static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr);
524 static bool bitmap_value_replace_in_set (bitmap_set_t, pre_expr);
525 static void bitmap_set_copy (bitmap_set_t, bitmap_set_t);
526 static bool bitmap_set_contains_value (bitmap_set_t, unsigned int);
527 static void bitmap_insert_into_set (bitmap_set_t, pre_expr);
528 static bitmap_set_t bitmap_set_new (void);
529 static tree create_expression_by_pieces (basic_block, pre_expr, gimple_seq *,
530 tree);
531 static tree find_or_generate_expression (basic_block, tree, gimple_seq *);
532 static unsigned int get_expr_value_id (pre_expr);
534 /* We can add and remove elements and entries to and from sets
535 and hash tables, so we use alloc pools for them. */
537 static object_allocator<bitmap_set> bitmap_set_pool ("Bitmap sets");
538 static bitmap_obstack grand_bitmap_obstack;
540 /* A three tuple {e, pred, v} used to cache phi translations in the
541 phi_translate_table. */
543 typedef struct expr_pred_trans_d : public typed_noop_remove <expr_pred_trans_d>
545 typedef expr_pred_trans_d value_type;
546 typedef expr_pred_trans_d compare_type;
548 /* The expression ID. */
549 unsigned e;
551 /* The value expression ID that resulted from the translation. */
552 unsigned v;
554 /* hash_table support. */
555 static inline void mark_empty (expr_pred_trans_d &);
556 static inline bool is_empty (const expr_pred_trans_d &);
557 static inline void mark_deleted (expr_pred_trans_d &);
558 static inline bool is_deleted (const expr_pred_trans_d &);
559 static const bool empty_zero_p = true;
560 static inline hashval_t hash (const expr_pred_trans_d &);
561 static inline int equal (const expr_pred_trans_d &, const expr_pred_trans_d &);
562 } *expr_pred_trans_t;
563 typedef const struct expr_pred_trans_d *const_expr_pred_trans_t;
565 inline bool
566 expr_pred_trans_d::is_empty (const expr_pred_trans_d &e)
568 return e.e == 0;
571 inline bool
572 expr_pred_trans_d::is_deleted (const expr_pred_trans_d &e)
574 return e.e == -1u;
577 inline void
578 expr_pred_trans_d::mark_empty (expr_pred_trans_d &e)
580 e.e = 0;
583 inline void
584 expr_pred_trans_d::mark_deleted (expr_pred_trans_d &e)
586 e.e = -1u;
589 inline hashval_t
590 expr_pred_trans_d::hash (const expr_pred_trans_d &e)
592 return e.e;
595 inline int
596 expr_pred_trans_d::equal (const expr_pred_trans_d &ve1,
597 const expr_pred_trans_d &ve2)
599 return ve1.e == ve2.e;
602 /* Sets that we need to keep track of. */
603 typedef struct bb_bitmap_sets
605 /* The EXP_GEN set, which represents expressions/values generated in
606 a basic block. */
607 bitmap_set_t exp_gen;
609 /* The PHI_GEN set, which represents PHI results generated in a
610 basic block. */
611 bitmap_set_t phi_gen;
613 /* The TMP_GEN set, which represents results/temporaries generated
614 in a basic block. IE the LHS of an expression. */
615 bitmap_set_t tmp_gen;
617 /* The AVAIL_OUT set, which represents which values are available in
618 a given basic block. */
619 bitmap_set_t avail_out;
621 /* The ANTIC_IN set, which represents which values are anticipatable
622 in a given basic block. */
623 bitmap_set_t antic_in;
625 /* The PA_IN set, which represents which values are
626 partially anticipatable in a given basic block. */
627 bitmap_set_t pa_in;
629 /* The NEW_SETS set, which is used during insertion to augment the
630 AVAIL_OUT set of blocks with the new insertions performed during
631 the current iteration. */
632 bitmap_set_t new_sets;
634 /* A cache for value_dies_in_block_x. */
635 bitmap expr_dies;
637 /* The live virtual operand on successor edges. */
638 tree vop_on_exit;
640 /* PHI translate cache for the single successor edge. */
641 hash_table<expr_pred_trans_d> *phi_translate_table;
643 /* True if we have visited this block during ANTIC calculation. */
644 unsigned int visited : 1;
646 /* True when the block contains a call that might not return. */
647 unsigned int contains_may_not_return_call : 1;
648 } *bb_value_sets_t;
650 #define EXP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->exp_gen
651 #define PHI_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->phi_gen
652 #define TMP_GEN(BB) ((bb_value_sets_t) ((BB)->aux))->tmp_gen
653 #define AVAIL_OUT(BB) ((bb_value_sets_t) ((BB)->aux))->avail_out
654 #define ANTIC_IN(BB) ((bb_value_sets_t) ((BB)->aux))->antic_in
655 #define PA_IN(BB) ((bb_value_sets_t) ((BB)->aux))->pa_in
656 #define NEW_SETS(BB) ((bb_value_sets_t) ((BB)->aux))->new_sets
657 #define EXPR_DIES(BB) ((bb_value_sets_t) ((BB)->aux))->expr_dies
658 #define PHI_TRANS_TABLE(BB) ((bb_value_sets_t) ((BB)->aux))->phi_translate_table
659 #define BB_VISITED(BB) ((bb_value_sets_t) ((BB)->aux))->visited
660 #define BB_MAY_NOTRETURN(BB) ((bb_value_sets_t) ((BB)->aux))->contains_may_not_return_call
661 #define BB_LIVE_VOP_ON_EXIT(BB) ((bb_value_sets_t) ((BB)->aux))->vop_on_exit
664 /* Add the tuple mapping from {expression E, basic block PRED} to
665 the phi translation table and return whether it pre-existed. */
667 static inline bool
668 phi_trans_add (expr_pred_trans_t *entry, pre_expr e, basic_block pred)
670 if (!PHI_TRANS_TABLE (pred))
671 PHI_TRANS_TABLE (pred) = new hash_table<expr_pred_trans_d> (11);
673 expr_pred_trans_t slot;
674 expr_pred_trans_d tem;
675 unsigned id = get_expression_id (e);
676 tem.e = id;
677 slot = PHI_TRANS_TABLE (pred)->find_slot_with_hash (tem, id, INSERT);
678 if (slot->e)
680 *entry = slot;
681 return true;
684 *entry = slot;
685 slot->e = id;
686 return false;
690 /* Add expression E to the expression set of value id V. */
692 static void
693 add_to_value (unsigned int v, pre_expr e)
695 gcc_checking_assert (get_expr_value_id (e) == v);
697 if (value_id_constant_p (v))
699 if (e->kind != CONSTANT)
700 return;
702 if (-v >= constant_value_expressions.length ())
703 constant_value_expressions.safe_grow_cleared (-v + 1);
705 pre_expr leader = constant_value_expressions[-v];
706 if (!leader)
707 constant_value_expressions[-v] = e;
709 else
711 if (v >= value_expressions.length ())
712 value_expressions.safe_grow_cleared (v + 1);
714 bitmap set = value_expressions[v];
715 if (!set)
717 set = BITMAP_ALLOC (&grand_bitmap_obstack);
718 value_expressions[v] = set;
720 bitmap_set_bit (set, get_expression_id (e));
724 /* Create a new bitmap set and return it. */
726 static bitmap_set_t
727 bitmap_set_new (void)
729 bitmap_set_t ret = bitmap_set_pool.allocate ();
730 bitmap_initialize (&ret->expressions, &grand_bitmap_obstack);
731 bitmap_initialize (&ret->values, &grand_bitmap_obstack);
732 return ret;
735 /* Return the value id for a PRE expression EXPR. */
737 static unsigned int
738 get_expr_value_id (pre_expr expr)
740 /* ??? We cannot assert that expr has a value-id (it can be 0), because
741 we assign value-ids only to expressions that have a result
742 in set_hashtable_value_ids. */
743 return expr->value_id;
746 /* Return a VN valnum (SSA name or constant) for the PRE value-id VAL. */
748 static tree
749 vn_valnum_from_value_id (unsigned int val)
751 if (value_id_constant_p (val))
753 pre_expr vexpr = constant_value_expressions[-val];
754 if (vexpr)
755 return PRE_EXPR_CONSTANT (vexpr);
756 return NULL_TREE;
759 bitmap exprset = value_expressions[val];
760 bitmap_iterator bi;
761 unsigned int i;
762 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
764 pre_expr vexpr = expression_for_id (i);
765 if (vexpr->kind == NAME)
766 return VN_INFO (PRE_EXPR_NAME (vexpr))->valnum;
768 return NULL_TREE;
771 /* Insert an expression EXPR into a bitmapped set. */
773 static void
774 bitmap_insert_into_set (bitmap_set_t set, pre_expr expr)
776 unsigned int val = get_expr_value_id (expr);
777 if (! value_id_constant_p (val))
779 /* Note this is the only function causing multiple expressions
780 for the same value to appear in a set. This is needed for
781 TMP_GEN, PHI_GEN and NEW_SETs. */
782 bitmap_set_bit (&set->values, val);
783 bitmap_set_bit (&set->expressions, get_expression_id (expr));
787 /* Copy a bitmapped set ORIG, into bitmapped set DEST. */
789 static void
790 bitmap_set_copy (bitmap_set_t dest, bitmap_set_t orig)
792 bitmap_copy (&dest->expressions, &orig->expressions);
793 bitmap_copy (&dest->values, &orig->values);
797 /* Free memory used up by SET. */
798 static void
799 bitmap_set_free (bitmap_set_t set)
801 bitmap_clear (&set->expressions);
802 bitmap_clear (&set->values);
805 static void
806 pre_expr_DFS (pre_expr expr, bitmap_set_t set, bitmap val_visited,
807 vec<pre_expr> &post);
809 /* DFS walk leaders of VAL to their operands with leaders in SET, collecting
810 expressions in SET in postorder into POST. */
812 static void
813 pre_expr_DFS (unsigned val, bitmap_set_t set, bitmap val_visited,
814 vec<pre_expr> &post)
816 unsigned int i;
817 bitmap_iterator bi;
819 /* Iterate over all leaders and DFS recurse. Borrowed from
820 bitmap_find_leader. */
821 bitmap exprset = value_expressions[val];
822 if (!exprset->first->next)
824 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
825 if (bitmap_bit_p (&set->expressions, i))
826 pre_expr_DFS (expression_for_id (i), set, val_visited, post);
827 return;
830 EXECUTE_IF_AND_IN_BITMAP (exprset, &set->expressions, 0, i, bi)
831 pre_expr_DFS (expression_for_id (i), set, val_visited, post);
834 /* DFS walk EXPR to its operands with leaders in SET, collecting
835 expressions in SET in postorder into POST. */
837 static void
838 pre_expr_DFS (pre_expr expr, bitmap_set_t set, bitmap val_visited,
839 vec<pre_expr> &post)
841 switch (expr->kind)
843 case NARY:
845 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
846 for (unsigned i = 0; i < nary->length; i++)
848 if (TREE_CODE (nary->op[i]) != SSA_NAME)
849 continue;
850 unsigned int op_val_id = VN_INFO (nary->op[i])->value_id;
851 /* If we already found a leader for the value we've
852 recursed already. Avoid the costly bitmap_find_leader. */
853 if (bitmap_bit_p (&set->values, op_val_id)
854 && bitmap_set_bit (val_visited, op_val_id))
855 pre_expr_DFS (op_val_id, set, val_visited, post);
857 break;
859 case REFERENCE:
861 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
862 vec<vn_reference_op_s> operands = ref->operands;
863 vn_reference_op_t operand;
864 for (unsigned i = 0; operands.iterate (i, &operand); i++)
866 tree op[3];
867 op[0] = operand->op0;
868 op[1] = operand->op1;
869 op[2] = operand->op2;
870 for (unsigned n = 0; n < 3; ++n)
872 if (!op[n] || TREE_CODE (op[n]) != SSA_NAME)
873 continue;
874 unsigned op_val_id = VN_INFO (op[n])->value_id;
875 if (bitmap_bit_p (&set->values, op_val_id)
876 && bitmap_set_bit (val_visited, op_val_id))
877 pre_expr_DFS (op_val_id, set, val_visited, post);
880 break;
882 default:;
884 post.quick_push (expr);
887 /* Generate an topological-ordered array of bitmap set SET. */
889 static vec<pre_expr>
890 sorted_array_from_bitmap_set (bitmap_set_t set)
892 unsigned int i;
893 bitmap_iterator bi;
894 vec<pre_expr> result;
896 /* Pre-allocate enough space for the array. */
897 result.create (bitmap_count_bits (&set->expressions));
899 auto_bitmap val_visited (&grand_bitmap_obstack);
900 bitmap_tree_view (val_visited);
901 FOR_EACH_VALUE_ID_IN_SET (set, i, bi)
902 if (bitmap_set_bit (val_visited, i))
903 pre_expr_DFS (i, set, val_visited, result);
905 return result;
908 /* Subtract all expressions contained in ORIG from DEST. */
910 static bitmap_set_t
911 bitmap_set_subtract_expressions (bitmap_set_t dest, bitmap_set_t orig)
913 bitmap_set_t result = bitmap_set_new ();
914 bitmap_iterator bi;
915 unsigned int i;
917 bitmap_and_compl (&result->expressions, &dest->expressions,
918 &orig->expressions);
920 FOR_EACH_EXPR_ID_IN_SET (result, i, bi)
922 pre_expr expr = expression_for_id (i);
923 unsigned int value_id = get_expr_value_id (expr);
924 bitmap_set_bit (&result->values, value_id);
927 return result;
930 /* Subtract all values in bitmap set B from bitmap set A. */
932 static void
933 bitmap_set_subtract_values (bitmap_set_t a, bitmap_set_t b)
935 unsigned int i;
936 bitmap_iterator bi;
937 unsigned to_remove = -1U;
938 bitmap_and_compl_into (&a->values, &b->values);
939 FOR_EACH_EXPR_ID_IN_SET (a, i, bi)
941 if (to_remove != -1U)
943 bitmap_clear_bit (&a->expressions, to_remove);
944 to_remove = -1U;
946 pre_expr expr = expression_for_id (i);
947 if (! bitmap_bit_p (&a->values, get_expr_value_id (expr)))
948 to_remove = i;
950 if (to_remove != -1U)
951 bitmap_clear_bit (&a->expressions, to_remove);
955 /* Return true if bitmapped set SET contains the value VALUE_ID. */
957 static bool
958 bitmap_set_contains_value (bitmap_set_t set, unsigned int value_id)
960 if (value_id_constant_p (value_id))
961 return true;
963 return bitmap_bit_p (&set->values, value_id);
966 /* Return true if two bitmap sets are equal. */
968 static bool
969 bitmap_set_equal (bitmap_set_t a, bitmap_set_t b)
971 return bitmap_equal_p (&a->values, &b->values);
974 /* Replace an instance of EXPR's VALUE with EXPR in SET if it exists,
975 and add it otherwise. Return true if any changes were made. */
977 static bool
978 bitmap_value_replace_in_set (bitmap_set_t set, pre_expr expr)
980 unsigned int val = get_expr_value_id (expr);
981 if (value_id_constant_p (val))
982 return false;
984 if (bitmap_set_contains_value (set, val))
986 /* The number of expressions having a given value is usually
987 significantly less than the total number of expressions in SET.
988 Thus, rather than check, for each expression in SET, whether it
989 has the value LOOKFOR, we walk the reverse mapping that tells us
990 what expressions have a given value, and see if any of those
991 expressions are in our set. For large testcases, this is about
992 5-10x faster than walking the bitmap. If this is somehow a
993 significant lose for some cases, we can choose which set to walk
994 based on the set size. */
995 unsigned int i;
996 bitmap_iterator bi;
997 bitmap exprset = value_expressions[val];
998 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
1000 if (bitmap_clear_bit (&set->expressions, i))
1002 bitmap_set_bit (&set->expressions, get_expression_id (expr));
1003 return i != get_expression_id (expr);
1006 gcc_unreachable ();
1009 bitmap_insert_into_set (set, expr);
1010 return true;
1013 /* Insert EXPR into SET if EXPR's value is not already present in
1014 SET. */
1016 static void
1017 bitmap_value_insert_into_set (bitmap_set_t set, pre_expr expr)
1019 unsigned int val = get_expr_value_id (expr);
1021 gcc_checking_assert (expr->id == get_expression_id (expr));
1023 /* Constant values are always considered to be part of the set. */
1024 if (value_id_constant_p (val))
1025 return;
1027 /* If the value membership changed, add the expression. */
1028 if (bitmap_set_bit (&set->values, val))
1029 bitmap_set_bit (&set->expressions, expr->id);
1032 /* Print out EXPR to outfile. */
1034 static void
1035 print_pre_expr (FILE *outfile, const pre_expr expr)
1037 if (! expr)
1039 fprintf (outfile, "NULL");
1040 return;
1042 switch (expr->kind)
1044 case CONSTANT:
1045 print_generic_expr (outfile, PRE_EXPR_CONSTANT (expr));
1046 break;
1047 case NAME:
1048 print_generic_expr (outfile, PRE_EXPR_NAME (expr));
1049 break;
1050 case NARY:
1052 unsigned int i;
1053 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
1054 fprintf (outfile, "{%s,", get_tree_code_name (nary->opcode));
1055 for (i = 0; i < nary->length; i++)
1057 print_generic_expr (outfile, nary->op[i]);
1058 if (i != (unsigned) nary->length - 1)
1059 fprintf (outfile, ",");
1061 fprintf (outfile, "}");
1063 break;
1065 case REFERENCE:
1067 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
1068 print_vn_reference_ops (outfile, ref->operands);
1069 if (ref->vuse)
1071 fprintf (outfile, "@");
1072 print_generic_expr (outfile, ref->vuse);
1075 break;
1078 void debug_pre_expr (pre_expr);
1080 /* Like print_pre_expr but always prints to stderr. */
1081 DEBUG_FUNCTION void
1082 debug_pre_expr (pre_expr e)
1084 print_pre_expr (stderr, e);
1085 fprintf (stderr, "\n");
1088 /* Print out SET to OUTFILE. */
1090 static void
1091 print_bitmap_set (FILE *outfile, bitmap_set_t set,
1092 const char *setname, int blockindex)
1094 fprintf (outfile, "%s[%d] := { ", setname, blockindex);
1095 if (set)
1097 bool first = true;
1098 unsigned i;
1099 bitmap_iterator bi;
1101 FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
1103 const pre_expr expr = expression_for_id (i);
1105 if (!first)
1106 fprintf (outfile, ", ");
1107 first = false;
1108 print_pre_expr (outfile, expr);
1110 fprintf (outfile, " (%04d)", get_expr_value_id (expr));
1113 fprintf (outfile, " }\n");
1116 void debug_bitmap_set (bitmap_set_t);
1118 DEBUG_FUNCTION void
1119 debug_bitmap_set (bitmap_set_t set)
1121 print_bitmap_set (stderr, set, "debug", 0);
1124 void debug_bitmap_sets_for (basic_block);
1126 DEBUG_FUNCTION void
1127 debug_bitmap_sets_for (basic_block bb)
1129 print_bitmap_set (stderr, AVAIL_OUT (bb), "avail_out", bb->index);
1130 print_bitmap_set (stderr, EXP_GEN (bb), "exp_gen", bb->index);
1131 print_bitmap_set (stderr, PHI_GEN (bb), "phi_gen", bb->index);
1132 print_bitmap_set (stderr, TMP_GEN (bb), "tmp_gen", bb->index);
1133 print_bitmap_set (stderr, ANTIC_IN (bb), "antic_in", bb->index);
1134 if (do_partial_partial)
1135 print_bitmap_set (stderr, PA_IN (bb), "pa_in", bb->index);
1136 print_bitmap_set (stderr, NEW_SETS (bb), "new_sets", bb->index);
1139 /* Print out the expressions that have VAL to OUTFILE. */
1141 static void
1142 print_value_expressions (FILE *outfile, unsigned int val)
1144 bitmap set = value_expressions[val];
1145 if (set)
1147 bitmap_set x;
1148 char s[10];
1149 sprintf (s, "%04d", val);
1150 x.expressions = *set;
1151 print_bitmap_set (outfile, &x, s, 0);
1156 DEBUG_FUNCTION void
1157 debug_value_expressions (unsigned int val)
1159 print_value_expressions (stderr, val);
1162 /* Given a CONSTANT, allocate a new CONSTANT type PRE_EXPR to
1163 represent it. */
1165 static pre_expr
1166 get_or_alloc_expr_for_constant (tree constant)
1168 unsigned int result_id;
1169 struct pre_expr_d expr;
1170 pre_expr newexpr;
1172 expr.kind = CONSTANT;
1173 PRE_EXPR_CONSTANT (&expr) = constant;
1174 result_id = lookup_expression_id (&expr);
1175 if (result_id != 0)
1176 return expression_for_id (result_id);
1178 newexpr = pre_expr_pool.allocate ();
1179 newexpr->kind = CONSTANT;
1180 newexpr->loc = UNKNOWN_LOCATION;
1181 PRE_EXPR_CONSTANT (newexpr) = constant;
1182 alloc_expression_id (newexpr);
1183 newexpr->value_id = get_or_alloc_constant_value_id (constant);
1184 add_to_value (newexpr->value_id, newexpr);
1185 return newexpr;
1188 /* Return the folded version of T if T, when folded, is a gimple
1189 min_invariant or an SSA name. Otherwise, return T. */
1191 static pre_expr
1192 fully_constant_expression (pre_expr e)
1194 switch (e->kind)
1196 case CONSTANT:
1197 return e;
1198 case NARY:
1200 vn_nary_op_t nary = PRE_EXPR_NARY (e);
1201 tree res = vn_nary_simplify (nary);
1202 if (!res)
1203 return e;
1204 if (is_gimple_min_invariant (res))
1205 return get_or_alloc_expr_for_constant (res);
1206 if (TREE_CODE (res) == SSA_NAME)
1207 return get_or_alloc_expr_for_name (res);
1208 return e;
1210 case REFERENCE:
1212 vn_reference_t ref = PRE_EXPR_REFERENCE (e);
1213 tree folded;
1214 if ((folded = fully_constant_vn_reference_p (ref)))
1215 return get_or_alloc_expr_for_constant (folded);
1216 return e;
1218 default:
1219 return e;
1223 /* Translate the VUSE backwards through phi nodes in E->dest, so that
1224 it has the value it would have in E->src. Set *SAME_VALID to true
1225 in case the new vuse doesn't change the value id of the OPERANDS. */
1227 static tree
1228 translate_vuse_through_block (vec<vn_reference_op_s> operands,
1229 alias_set_type set, alias_set_type base_set,
1230 tree type, tree vuse, edge e, bool *same_valid)
1232 basic_block phiblock = e->dest;
1233 gimple *phi = SSA_NAME_DEF_STMT (vuse);
1234 ao_ref ref;
1236 if (same_valid)
1237 *same_valid = true;
1239 /* If value-numbering provided a memory state for this
1240 that dominates PHIBLOCK we can just use that. */
1241 if (gimple_nop_p (phi)
1242 || (gimple_bb (phi) != phiblock
1243 && dominated_by_p (CDI_DOMINATORS, phiblock, gimple_bb (phi))))
1244 return vuse;
1246 /* We have pruned expressions that are killed in PHIBLOCK via
1247 prune_clobbered_mems but we have not rewritten the VUSE to the one
1248 live at the start of the block. If there is no virtual PHI to translate
1249 through return the VUSE live at entry. Otherwise the VUSE to translate
1250 is the def of the virtual PHI node. */
1251 phi = get_virtual_phi (phiblock);
1252 if (!phi)
1253 return BB_LIVE_VOP_ON_EXIT
1254 (get_immediate_dominator (CDI_DOMINATORS, phiblock));
1256 if (same_valid
1257 && ao_ref_init_from_vn_reference (&ref, set, base_set, type, operands))
1259 bitmap visited = NULL;
1260 /* Try to find a vuse that dominates this phi node by skipping
1261 non-clobbering statements. */
1262 unsigned int cnt = param_sccvn_max_alias_queries_per_access;
1263 vuse = get_continuation_for_phi (phi, &ref, true,
1264 cnt, &visited, false, NULL, NULL);
1265 if (visited)
1266 BITMAP_FREE (visited);
1268 else
1269 vuse = NULL_TREE;
1270 /* If we didn't find any, the value ID can't stay the same. */
1271 if (!vuse && same_valid)
1272 *same_valid = false;
1274 /* ??? We would like to return vuse here as this is the canonical
1275 upmost vdef that this reference is associated with. But during
1276 insertion of the references into the hash tables we only ever
1277 directly insert with their direct gimple_vuse, hence returning
1278 something else would make us not find the other expression. */
1279 return PHI_ARG_DEF (phi, e->dest_idx);
1282 /* Like bitmap_find_leader, but checks for the value existing in SET1 *or*
1283 SET2 *or* SET3. This is used to avoid making a set consisting of the union
1284 of PA_IN and ANTIC_IN during insert and phi-translation. */
1286 static inline pre_expr
1287 find_leader_in_sets (unsigned int val, bitmap_set_t set1, bitmap_set_t set2,
1288 bitmap_set_t set3 = NULL)
1290 pre_expr result = NULL;
1292 if (set1)
1293 result = bitmap_find_leader (set1, val);
1294 if (!result && set2)
1295 result = bitmap_find_leader (set2, val);
1296 if (!result && set3)
1297 result = bitmap_find_leader (set3, val);
1298 return result;
1301 /* Get the tree type for our PRE expression e. */
1303 static tree
1304 get_expr_type (const pre_expr e)
1306 switch (e->kind)
1308 case NAME:
1309 return TREE_TYPE (PRE_EXPR_NAME (e));
1310 case CONSTANT:
1311 return TREE_TYPE (PRE_EXPR_CONSTANT (e));
1312 case REFERENCE:
1313 return PRE_EXPR_REFERENCE (e)->type;
1314 case NARY:
1315 return PRE_EXPR_NARY (e)->type;
1317 gcc_unreachable ();
1320 /* Get a representative SSA_NAME for a given expression that is available in B.
1321 Since all of our sub-expressions are treated as values, we require
1322 them to be SSA_NAME's for simplicity.
1323 Prior versions of GVNPRE used to use "value handles" here, so that
1324 an expression would be VH.11 + VH.10 instead of d_3 + e_6. In
1325 either case, the operands are really values (IE we do not expect
1326 them to be usable without finding leaders). */
1328 static tree
1329 get_representative_for (const pre_expr e, basic_block b = NULL)
1331 tree name, valnum = NULL_TREE;
1332 unsigned int value_id = get_expr_value_id (e);
1334 switch (e->kind)
1336 case NAME:
1337 return PRE_EXPR_NAME (e);
1338 case CONSTANT:
1339 return PRE_EXPR_CONSTANT (e);
1340 case NARY:
1341 case REFERENCE:
1343 /* Go through all of the expressions representing this value
1344 and pick out an SSA_NAME. */
1345 unsigned int i;
1346 bitmap_iterator bi;
1347 bitmap exprs = value_expressions[value_id];
1348 EXECUTE_IF_SET_IN_BITMAP (exprs, 0, i, bi)
1350 pre_expr rep = expression_for_id (i);
1351 if (rep->kind == NAME)
1353 tree name = PRE_EXPR_NAME (rep);
1354 valnum = VN_INFO (name)->valnum;
1355 gimple *def = SSA_NAME_DEF_STMT (name);
1356 /* We have to return either a new representative or one
1357 that can be used for expression simplification and thus
1358 is available in B. */
1359 if (! b
1360 || gimple_nop_p (def)
1361 || dominated_by_p (CDI_DOMINATORS, b, gimple_bb (def)))
1362 return name;
1364 else if (rep->kind == CONSTANT)
1365 return PRE_EXPR_CONSTANT (rep);
1368 break;
1371 /* If we reached here we couldn't find an SSA_NAME. This can
1372 happen when we've discovered a value that has never appeared in
1373 the program as set to an SSA_NAME, as the result of phi translation.
1374 Create one here.
1375 ??? We should be able to re-use this when we insert the statement
1376 to compute it. */
1377 name = make_temp_ssa_name (get_expr_type (e), gimple_build_nop (), "pretmp");
1378 vn_ssa_aux_t vn_info = VN_INFO (name);
1379 vn_info->value_id = value_id;
1380 vn_info->valnum = valnum ? valnum : name;
1381 vn_info->visited = true;
1382 /* ??? For now mark this SSA name for release by VN. */
1383 vn_info->needs_insertion = true;
1384 add_to_value (value_id, get_or_alloc_expr_for_name (name));
1385 if (dump_file && (dump_flags & TDF_DETAILS))
1387 fprintf (dump_file, "Created SSA_NAME representative ");
1388 print_generic_expr (dump_file, name);
1389 fprintf (dump_file, " for expression:");
1390 print_pre_expr (dump_file, e);
1391 fprintf (dump_file, " (%04d)\n", value_id);
1394 return name;
1398 static pre_expr
1399 phi_translate (bitmap_set_t, pre_expr, bitmap_set_t, bitmap_set_t, edge);
1401 /* Translate EXPR using phis in PHIBLOCK, so that it has the values of
1402 the phis in PRED. Return NULL if we can't find a leader for each part
1403 of the translated expression. */
1405 static pre_expr
1406 phi_translate_1 (bitmap_set_t dest,
1407 pre_expr expr, bitmap_set_t set1, bitmap_set_t set2, edge e)
1409 basic_block pred = e->src;
1410 basic_block phiblock = e->dest;
1411 location_t expr_loc = expr->loc;
1412 switch (expr->kind)
1414 case NARY:
1416 unsigned int i;
1417 bool changed = false;
1418 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
1419 vn_nary_op_t newnary = XALLOCAVAR (struct vn_nary_op_s,
1420 sizeof_vn_nary_op (nary->length));
1421 memcpy (newnary, nary, sizeof_vn_nary_op (nary->length));
1423 for (i = 0; i < newnary->length; i++)
1425 if (TREE_CODE (newnary->op[i]) != SSA_NAME)
1426 continue;
1427 else
1429 pre_expr leader, result;
1430 unsigned int op_val_id = VN_INFO (newnary->op[i])->value_id;
1431 leader = find_leader_in_sets (op_val_id, set1, set2);
1432 result = phi_translate (dest, leader, set1, set2, e);
1433 if (result)
1434 /* If op has a leader in the sets we translate make
1435 sure to use the value of the translated expression.
1436 We might need a new representative for that. */
1437 newnary->op[i] = get_representative_for (result, pred);
1438 else if (!result)
1439 return NULL;
1441 changed |= newnary->op[i] != nary->op[i];
1444 if (changed)
1446 pre_expr constant;
1447 unsigned int new_val_id;
1449 PRE_EXPR_NARY (expr) = newnary;
1450 constant = fully_constant_expression (expr);
1451 PRE_EXPR_NARY (expr) = nary;
1452 if (constant != expr)
1454 /* For non-CONSTANTs we have to make sure we can eventually
1455 insert the expression. Which means we need to have a
1456 leader for it. */
1457 if (constant->kind != CONSTANT)
1459 /* Do not allow simplifications to non-constants over
1460 backedges as this will likely result in a loop PHI node
1461 to be inserted and increased register pressure.
1462 See PR77498 - this avoids doing predcoms work in
1463 a less efficient way. */
1464 if (e->flags & EDGE_DFS_BACK)
1466 else
1468 unsigned value_id = get_expr_value_id (constant);
1469 /* We want a leader in ANTIC_OUT or AVAIL_OUT here.
1470 dest has what we computed into ANTIC_OUT sofar
1471 so pick from that - since topological sorting
1472 by sorted_array_from_bitmap_set isn't perfect
1473 we may lose some cases here. */
1474 constant = find_leader_in_sets (value_id, dest,
1475 AVAIL_OUT (pred));
1476 if (constant)
1478 if (dump_file && (dump_flags & TDF_DETAILS))
1480 fprintf (dump_file, "simplifying ");
1481 print_pre_expr (dump_file, expr);
1482 fprintf (dump_file, " translated %d -> %d to ",
1483 phiblock->index, pred->index);
1484 PRE_EXPR_NARY (expr) = newnary;
1485 print_pre_expr (dump_file, expr);
1486 PRE_EXPR_NARY (expr) = nary;
1487 fprintf (dump_file, " to ");
1488 print_pre_expr (dump_file, constant);
1489 fprintf (dump_file, "\n");
1491 return constant;
1495 else
1496 return constant;
1499 tree result = vn_nary_op_lookup_pieces (newnary->length,
1500 newnary->opcode,
1501 newnary->type,
1502 &newnary->op[0],
1503 &nary);
1504 if (result && is_gimple_min_invariant (result))
1505 return get_or_alloc_expr_for_constant (result);
1507 if (!nary || nary->predicated_values)
1508 new_val_id = 0;
1509 else
1510 new_val_id = nary->value_id;
1511 expr = get_or_alloc_expr_for_nary (newnary, new_val_id, expr_loc);
1512 add_to_value (get_expr_value_id (expr), expr);
1514 return expr;
1516 break;
1518 case REFERENCE:
1520 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
1521 vec<vn_reference_op_s> operands = ref->operands;
1522 tree vuse = ref->vuse;
1523 tree newvuse = vuse;
1524 vec<vn_reference_op_s> newoperands = vNULL;
1525 bool changed = false, same_valid = true;
1526 unsigned int i, n;
1527 vn_reference_op_t operand;
1528 vn_reference_t newref;
1530 for (i = 0; operands.iterate (i, &operand); i++)
1532 pre_expr opresult;
1533 pre_expr leader;
1534 tree op[3];
1535 tree type = operand->type;
1536 vn_reference_op_s newop = *operand;
1537 op[0] = operand->op0;
1538 op[1] = operand->op1;
1539 op[2] = operand->op2;
1540 for (n = 0; n < 3; ++n)
1542 unsigned int op_val_id;
1543 if (!op[n])
1544 continue;
1545 if (TREE_CODE (op[n]) != SSA_NAME)
1547 /* We can't possibly insert these. */
1548 if (n != 0
1549 && !is_gimple_min_invariant (op[n]))
1550 break;
1551 continue;
1553 op_val_id = VN_INFO (op[n])->value_id;
1554 leader = find_leader_in_sets (op_val_id, set1, set2);
1555 opresult = phi_translate (dest, leader, set1, set2, e);
1556 if (opresult)
1558 tree name = get_representative_for (opresult);
1559 changed |= name != op[n];
1560 op[n] = name;
1562 else if (!opresult)
1563 break;
1565 if (n != 3)
1567 newoperands.release ();
1568 return NULL;
1570 /* When we translate a MEM_REF across a backedge and we have
1571 restrict info that's not from our functions parameters
1572 we have to remap it since we now may deal with a different
1573 instance where the dependence info is no longer valid.
1574 See PR102970. Note instead of keeping a remapping table
1575 per backedge we simply throw away restrict info. */
1576 if ((newop.opcode == MEM_REF
1577 || newop.opcode == TARGET_MEM_REF)
1578 && newop.clique > 1
1579 && (e->flags & EDGE_DFS_BACK))
1581 newop.clique = 0;
1582 newop.base = 0;
1583 changed = true;
1585 if (!changed)
1586 continue;
1587 if (!newoperands.exists ())
1588 newoperands = operands.copy ();
1589 /* We may have changed from an SSA_NAME to a constant */
1590 if (newop.opcode == SSA_NAME && TREE_CODE (op[0]) != SSA_NAME)
1591 newop.opcode = TREE_CODE (op[0]);
1592 newop.type = type;
1593 newop.op0 = op[0];
1594 newop.op1 = op[1];
1595 newop.op2 = op[2];
1596 newoperands[i] = newop;
1598 gcc_checking_assert (i == operands.length ());
1600 if (vuse)
1602 newvuse = translate_vuse_through_block (newoperands.exists ()
1603 ? newoperands : operands,
1604 ref->set, ref->base_set,
1605 ref->type, vuse, e,
1606 changed
1607 ? NULL : &same_valid);
1608 if (newvuse == NULL_TREE)
1610 newoperands.release ();
1611 return NULL;
1615 if (changed || newvuse != vuse)
1617 unsigned int new_val_id;
1619 tree result = vn_reference_lookup_pieces (newvuse, ref->set,
1620 ref->base_set,
1621 ref->type,
1622 newoperands.exists ()
1623 ? newoperands : operands,
1624 &newref, VN_WALK);
1625 if (result)
1626 newoperands.release ();
1628 /* We can always insert constants, so if we have a partial
1629 redundant constant load of another type try to translate it
1630 to a constant of appropriate type. */
1631 if (result && is_gimple_min_invariant (result))
1633 tree tem = result;
1634 if (!useless_type_conversion_p (ref->type, TREE_TYPE (result)))
1636 tem = fold_unary (VIEW_CONVERT_EXPR, ref->type, result);
1637 if (tem && !is_gimple_min_invariant (tem))
1638 tem = NULL_TREE;
1640 if (tem)
1641 return get_or_alloc_expr_for_constant (tem);
1644 /* If we'd have to convert things we would need to validate
1645 if we can insert the translated expression. So fail
1646 here for now - we cannot insert an alias with a different
1647 type in the VN tables either, as that would assert. */
1648 if (result
1649 && !useless_type_conversion_p (ref->type, TREE_TYPE (result)))
1650 return NULL;
1651 else if (!result && newref
1652 && !useless_type_conversion_p (ref->type, newref->type))
1654 newoperands.release ();
1655 return NULL;
1658 if (newref)
1659 new_val_id = newref->value_id;
1660 else
1662 if (changed || !same_valid)
1663 new_val_id = get_next_value_id ();
1664 else
1665 new_val_id = ref->value_id;
1666 if (!newoperands.exists ())
1667 newoperands = operands.copy ();
1668 newref = vn_reference_insert_pieces (newvuse, ref->set,
1669 ref->base_set,
1670 ref->offset, ref->max_size,
1671 ref->type, newoperands,
1672 result, new_val_id);
1673 newoperands = vNULL;
1675 expr = get_or_alloc_expr_for_reference (newref, expr_loc);
1676 add_to_value (new_val_id, expr);
1678 newoperands.release ();
1679 return expr;
1681 break;
1683 case NAME:
1685 tree name = PRE_EXPR_NAME (expr);
1686 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1687 /* If the SSA name is defined by a PHI node in this block,
1688 translate it. */
1689 if (gimple_code (def_stmt) == GIMPLE_PHI
1690 && gimple_bb (def_stmt) == phiblock)
1692 tree def = PHI_ARG_DEF (def_stmt, e->dest_idx);
1694 /* Handle constant. */
1695 if (is_gimple_min_invariant (def))
1696 return get_or_alloc_expr_for_constant (def);
1698 return get_or_alloc_expr_for_name (def);
1700 /* Otherwise return it unchanged - it will get removed if its
1701 value is not available in PREDs AVAIL_OUT set of expressions
1702 by the subtraction of TMP_GEN. */
1703 return expr;
1706 default:
1707 gcc_unreachable ();
1711 /* Wrapper around phi_translate_1 providing caching functionality. */
1713 static pre_expr
1714 phi_translate (bitmap_set_t dest, pre_expr expr,
1715 bitmap_set_t set1, bitmap_set_t set2, edge e)
1717 expr_pred_trans_t slot = NULL;
1718 pre_expr phitrans;
1720 if (!expr)
1721 return NULL;
1723 /* Constants contain no values that need translation. */
1724 if (expr->kind == CONSTANT)
1725 return expr;
1727 if (value_id_constant_p (get_expr_value_id (expr)))
1728 return expr;
1730 /* Don't add translations of NAMEs as those are cheap to translate. */
1731 if (expr->kind != NAME)
1733 if (phi_trans_add (&slot, expr, e->src))
1734 return slot->v == 0 ? NULL : expression_for_id (slot->v);
1735 /* Store NULL for the value we want to return in the case of
1736 recursing. */
1737 slot->v = 0;
1740 /* Translate. */
1741 basic_block saved_valueize_bb = vn_context_bb;
1742 vn_context_bb = e->src;
1743 phitrans = phi_translate_1 (dest, expr, set1, set2, e);
1744 vn_context_bb = saved_valueize_bb;
1746 if (slot)
1748 /* We may have reallocated. */
1749 phi_trans_add (&slot, expr, e->src);
1750 if (phitrans)
1751 slot->v = get_expression_id (phitrans);
1752 else
1753 /* Remove failed translations again, they cause insert
1754 iteration to not pick up new opportunities reliably. */
1755 PHI_TRANS_TABLE (e->src)->clear_slot (slot);
1758 return phitrans;
1762 /* For each expression in SET, translate the values through phi nodes
1763 in PHIBLOCK using edge PHIBLOCK->PRED, and store the resulting
1764 expressions in DEST. */
1766 static void
1767 phi_translate_set (bitmap_set_t dest, bitmap_set_t set, edge e)
1769 bitmap_iterator bi;
1770 unsigned int i;
1772 if (gimple_seq_empty_p (phi_nodes (e->dest)))
1774 bitmap_set_copy (dest, set);
1775 return;
1778 /* Allocate the phi-translation cache where we have an idea about
1779 its size. hash-table implementation internals tell us that
1780 allocating the table to fit twice the number of elements will
1781 make sure we do not usually re-allocate. */
1782 if (!PHI_TRANS_TABLE (e->src))
1783 PHI_TRANS_TABLE (e->src) = new hash_table<expr_pred_trans_d>
1784 (2 * bitmap_count_bits (&set->expressions));
1785 FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
1787 pre_expr expr = expression_for_id (i);
1788 pre_expr translated = phi_translate (dest, expr, set, NULL, e);
1789 if (!translated)
1790 continue;
1792 bitmap_insert_into_set (dest, translated);
1796 /* Find the leader for a value (i.e., the name representing that
1797 value) in a given set, and return it. Return NULL if no leader
1798 is found. */
1800 static pre_expr
1801 bitmap_find_leader (bitmap_set_t set, unsigned int val)
1803 if (value_id_constant_p (val))
1804 return constant_value_expressions[-val];
1806 if (bitmap_set_contains_value (set, val))
1808 /* Rather than walk the entire bitmap of expressions, and see
1809 whether any of them has the value we are looking for, we look
1810 at the reverse mapping, which tells us the set of expressions
1811 that have a given value (IE value->expressions with that
1812 value) and see if any of those expressions are in our set.
1813 The number of expressions per value is usually significantly
1814 less than the number of expressions in the set. In fact, for
1815 large testcases, doing it this way is roughly 5-10x faster
1816 than walking the bitmap.
1817 If this is somehow a significant lose for some cases, we can
1818 choose which set to walk based on which set is smaller. */
1819 unsigned int i;
1820 bitmap_iterator bi;
1821 bitmap exprset = value_expressions[val];
1823 if (!exprset->first->next)
1824 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
1825 if (bitmap_bit_p (&set->expressions, i))
1826 return expression_for_id (i);
1828 EXECUTE_IF_AND_IN_BITMAP (exprset, &set->expressions, 0, i, bi)
1829 return expression_for_id (i);
1831 return NULL;
1834 /* Determine if EXPR, a memory expression, is ANTIC_IN at the top of
1835 BLOCK by seeing if it is not killed in the block. Note that we are
1836 only determining whether there is a store that kills it. Because
1837 of the order in which clean iterates over values, we are guaranteed
1838 that altered operands will have caused us to be eliminated from the
1839 ANTIC_IN set already. */
1841 static bool
1842 value_dies_in_block_x (pre_expr expr, basic_block block)
1844 tree vuse = PRE_EXPR_REFERENCE (expr)->vuse;
1845 vn_reference_t refx = PRE_EXPR_REFERENCE (expr);
1846 gimple *def;
1847 gimple_stmt_iterator gsi;
1848 unsigned id = get_expression_id (expr);
1849 bool res = false;
1850 ao_ref ref;
1852 if (!vuse)
1853 return false;
1855 /* Lookup a previously calculated result. */
1856 if (EXPR_DIES (block)
1857 && bitmap_bit_p (EXPR_DIES (block), id * 2))
1858 return bitmap_bit_p (EXPR_DIES (block), id * 2 + 1);
1860 /* A memory expression {e, VUSE} dies in the block if there is a
1861 statement that may clobber e. If, starting statement walk from the
1862 top of the basic block, a statement uses VUSE there can be no kill
1863 inbetween that use and the original statement that loaded {e, VUSE},
1864 so we can stop walking. */
1865 ref.base = NULL_TREE;
1866 for (gsi = gsi_start_bb (block); !gsi_end_p (gsi); gsi_next (&gsi))
1868 tree def_vuse, def_vdef;
1869 def = gsi_stmt (gsi);
1870 def_vuse = gimple_vuse (def);
1871 def_vdef = gimple_vdef (def);
1873 /* Not a memory statement. */
1874 if (!def_vuse)
1875 continue;
1877 /* Not a may-def. */
1878 if (!def_vdef)
1880 /* A load with the same VUSE, we're done. */
1881 if (def_vuse == vuse)
1882 break;
1884 continue;
1887 /* Init ref only if we really need it. */
1888 if (ref.base == NULL_TREE
1889 && !ao_ref_init_from_vn_reference (&ref, refx->set, refx->base_set,
1890 refx->type, refx->operands))
1892 res = true;
1893 break;
1895 /* If the statement may clobber expr, it dies. */
1896 if (stmt_may_clobber_ref_p_1 (def, &ref))
1898 res = true;
1899 break;
1903 /* Remember the result. */
1904 if (!EXPR_DIES (block))
1905 EXPR_DIES (block) = BITMAP_ALLOC (&grand_bitmap_obstack);
1906 bitmap_set_bit (EXPR_DIES (block), id * 2);
1907 if (res)
1908 bitmap_set_bit (EXPR_DIES (block), id * 2 + 1);
1910 return res;
1914 /* Determine if OP is valid in SET1 U SET2, which it is when the union
1915 contains its value-id. */
1917 static bool
1918 op_valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, tree op)
1920 if (op && TREE_CODE (op) == SSA_NAME)
1922 unsigned int value_id = VN_INFO (op)->value_id;
1923 if (!(bitmap_set_contains_value (set1, value_id)
1924 || (set2 && bitmap_set_contains_value (set2, value_id))))
1925 return false;
1927 return true;
1930 /* Determine if the expression EXPR is valid in SET1 U SET2.
1931 ONLY SET2 CAN BE NULL.
1932 This means that we have a leader for each part of the expression
1933 (if it consists of values), or the expression is an SSA_NAME.
1934 For loads/calls, we also see if the vuse is killed in this block. */
1936 static bool
1937 valid_in_sets (bitmap_set_t set1, bitmap_set_t set2, pre_expr expr)
1939 switch (expr->kind)
1941 case NAME:
1942 /* By construction all NAMEs are available. Non-available
1943 NAMEs are removed by subtracting TMP_GEN from the sets. */
1944 return true;
1945 case NARY:
1947 unsigned int i;
1948 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
1949 for (i = 0; i < nary->length; i++)
1950 if (!op_valid_in_sets (set1, set2, nary->op[i]))
1951 return false;
1952 return true;
1954 break;
1955 case REFERENCE:
1957 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
1958 vn_reference_op_t vro;
1959 unsigned int i;
1961 FOR_EACH_VEC_ELT (ref->operands, i, vro)
1963 if (!op_valid_in_sets (set1, set2, vro->op0)
1964 || !op_valid_in_sets (set1, set2, vro->op1)
1965 || !op_valid_in_sets (set1, set2, vro->op2))
1966 return false;
1968 return true;
1970 default:
1971 gcc_unreachable ();
1975 /* Clean the set of expressions SET1 that are no longer valid in SET1 or SET2.
1976 This means expressions that are made up of values we have no leaders for
1977 in SET1 or SET2. */
1979 static void
1980 clean (bitmap_set_t set1, bitmap_set_t set2 = NULL)
1982 vec<pre_expr> exprs = sorted_array_from_bitmap_set (set1);
1983 pre_expr expr;
1984 int i;
1986 FOR_EACH_VEC_ELT (exprs, i, expr)
1988 if (!valid_in_sets (set1, set2, expr))
1990 unsigned int val = get_expr_value_id (expr);
1991 bitmap_clear_bit (&set1->expressions, get_expression_id (expr));
1992 /* We are entered with possibly multiple expressions for a value
1993 so before removing a value from the set see if there's an
1994 expression for it left. */
1995 if (! bitmap_find_leader (set1, val))
1996 bitmap_clear_bit (&set1->values, val);
1999 exprs.release ();
2001 if (flag_checking)
2003 unsigned j;
2004 bitmap_iterator bi;
2005 FOR_EACH_EXPR_ID_IN_SET (set1, j, bi)
2006 gcc_assert (valid_in_sets (set1, set2, expression_for_id (j)));
2010 /* Clean the set of expressions that are no longer valid in SET because
2011 they are clobbered in BLOCK or because they trap and may not be executed.
2012 When CLEAN_TRAPS is true remove all possibly trapping expressions. */
2014 static void
2015 prune_clobbered_mems (bitmap_set_t set, basic_block block, bool clean_traps)
2017 bitmap_iterator bi;
2018 unsigned i;
2019 unsigned to_remove = -1U;
2020 bool any_removed = false;
2022 FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
2024 /* Remove queued expr. */
2025 if (to_remove != -1U)
2027 bitmap_clear_bit (&set->expressions, to_remove);
2028 any_removed = true;
2029 to_remove = -1U;
2032 pre_expr expr = expression_for_id (i);
2033 if (expr->kind == REFERENCE)
2035 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
2036 if (ref->vuse)
2038 gimple *def_stmt = SSA_NAME_DEF_STMT (ref->vuse);
2039 if (!gimple_nop_p (def_stmt)
2040 /* If value-numbering provided a memory state for this
2041 that dominates BLOCK we're done, otherwise we have
2042 to check if the value dies in BLOCK. */
2043 && !(gimple_bb (def_stmt) != block
2044 && dominated_by_p (CDI_DOMINATORS,
2045 block, gimple_bb (def_stmt)))
2046 && value_dies_in_block_x (expr, block))
2047 to_remove = i;
2049 /* If the REFERENCE may trap make sure the block does not contain
2050 a possible exit point.
2051 ??? This is overly conservative if we translate AVAIL_OUT
2052 as the available expression might be after the exit point. */
2053 if ((BB_MAY_NOTRETURN (block) || clean_traps)
2054 && vn_reference_may_trap (ref))
2055 to_remove = i;
2057 else if (expr->kind == NARY)
2059 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
2060 /* If the NARY may trap make sure the block does not contain
2061 a possible exit point.
2062 ??? This is overly conservative if we translate AVAIL_OUT
2063 as the available expression might be after the exit point. */
2064 if ((BB_MAY_NOTRETURN (block) || clean_traps)
2065 && vn_nary_may_trap (nary))
2066 to_remove = i;
2070 /* Remove queued expr. */
2071 if (to_remove != -1U)
2073 bitmap_clear_bit (&set->expressions, to_remove);
2074 any_removed = true;
2077 /* Above we only removed expressions, now clean the set of values
2078 which no longer have any corresponding expression. We cannot
2079 clear the value at the time we remove an expression since there
2080 may be multiple expressions per value.
2081 If we'd queue possibly to be removed values we could use
2082 the bitmap_find_leader way to see if there's still an expression
2083 for it. For some ratio of to be removed values and number of
2084 values/expressions in the set this might be faster than rebuilding
2085 the value-set. */
2086 if (any_removed)
2088 bitmap_clear (&set->values);
2089 FOR_EACH_EXPR_ID_IN_SET (set, i, bi)
2091 pre_expr expr = expression_for_id (i);
2092 unsigned int value_id = get_expr_value_id (expr);
2093 bitmap_set_bit (&set->values, value_id);
2098 /* Compute the ANTIC set for BLOCK.
2100 If succs(BLOCK) > 1 then
2101 ANTIC_OUT[BLOCK] = intersection of ANTIC_IN[b] for all succ(BLOCK)
2102 else if succs(BLOCK) == 1 then
2103 ANTIC_OUT[BLOCK] = phi_translate (ANTIC_IN[succ(BLOCK)])
2105 ANTIC_IN[BLOCK] = clean(ANTIC_OUT[BLOCK] U EXP_GEN[BLOCK] - TMP_GEN[BLOCK])
2107 Note that clean() is deferred until after the iteration. */
2109 static bool
2110 compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
2112 bitmap_set_t S, old, ANTIC_OUT;
2113 edge e;
2114 edge_iterator ei;
2116 bool was_visited = BB_VISITED (block);
2117 bool changed = ! BB_VISITED (block);
2118 bool any_max_on_edge = false;
2120 BB_VISITED (block) = 1;
2121 old = ANTIC_OUT = S = NULL;
2123 /* If any edges from predecessors are abnormal, antic_in is empty,
2124 so do nothing. */
2125 if (block_has_abnormal_pred_edge)
2126 goto maybe_dump_sets;
2128 old = ANTIC_IN (block);
2129 ANTIC_OUT = bitmap_set_new ();
2131 /* If the block has no successors, ANTIC_OUT is empty. */
2132 if (EDGE_COUNT (block->succs) == 0)
2134 /* If we have one successor, we could have some phi nodes to
2135 translate through. */
2136 else if (single_succ_p (block))
2138 e = single_succ_edge (block);
2139 gcc_assert (BB_VISITED (e->dest));
2140 phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e);
2142 /* If we have multiple successors, we take the intersection of all of
2143 them. Note that in the case of loop exit phi nodes, we may have
2144 phis to translate through. */
2145 else
2147 size_t i;
2148 edge first = NULL;
2150 auto_vec<edge> worklist (EDGE_COUNT (block->succs));
2151 FOR_EACH_EDGE (e, ei, block->succs)
2153 if (!first
2154 && BB_VISITED (e->dest))
2155 first = e;
2156 else if (BB_VISITED (e->dest))
2157 worklist.quick_push (e);
2158 else
2160 /* Unvisited successors get their ANTIC_IN replaced by the
2161 maximal set to arrive at a maximum ANTIC_IN solution.
2162 We can ignore them in the intersection operation and thus
2163 need not explicitely represent that maximum solution. */
2164 any_max_on_edge = true;
2165 if (dump_file && (dump_flags & TDF_DETAILS))
2166 fprintf (dump_file, "ANTIC_IN is MAX on %d->%d\n",
2167 e->src->index, e->dest->index);
2171 /* Of multiple successors we have to have visited one already
2172 which is guaranteed by iteration order. */
2173 gcc_assert (first != NULL);
2175 phi_translate_set (ANTIC_OUT, ANTIC_IN (first->dest), first);
2177 /* If we have multiple successors we need to intersect the ANTIC_OUT
2178 sets. For values that's a simple intersection but for
2179 expressions it is a union. Given we want to have a single
2180 expression per value in our sets we have to canonicalize.
2181 Avoid randomness and running into cycles like for PR82129 and
2182 canonicalize the expression we choose to the one with the
2183 lowest id. This requires we actually compute the union first. */
2184 FOR_EACH_VEC_ELT (worklist, i, e)
2186 if (!gimple_seq_empty_p (phi_nodes (e->dest)))
2188 bitmap_set_t tmp = bitmap_set_new ();
2189 phi_translate_set (tmp, ANTIC_IN (e->dest), e);
2190 bitmap_and_into (&ANTIC_OUT->values, &tmp->values);
2191 bitmap_ior_into (&ANTIC_OUT->expressions, &tmp->expressions);
2192 bitmap_set_free (tmp);
2194 else
2196 bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values);
2197 bitmap_ior_into (&ANTIC_OUT->expressions,
2198 &ANTIC_IN (e->dest)->expressions);
2201 if (! worklist.is_empty ())
2203 /* Prune expressions not in the value set. */
2204 bitmap_iterator bi;
2205 unsigned int i;
2206 unsigned int to_clear = -1U;
2207 FOR_EACH_EXPR_ID_IN_SET (ANTIC_OUT, i, bi)
2209 if (to_clear != -1U)
2211 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear);
2212 to_clear = -1U;
2214 pre_expr expr = expression_for_id (i);
2215 unsigned int value_id = get_expr_value_id (expr);
2216 if (!bitmap_bit_p (&ANTIC_OUT->values, value_id))
2217 to_clear = i;
2219 if (to_clear != -1U)
2220 bitmap_clear_bit (&ANTIC_OUT->expressions, to_clear);
2224 /* Dump ANTIC_OUT before it's pruned. */
2225 if (dump_file && (dump_flags & TDF_DETAILS))
2226 print_bitmap_set (dump_file, ANTIC_OUT, "ANTIC_OUT", block->index);
2228 /* Prune expressions that are clobbered in block and thus become
2229 invalid if translated from ANTIC_OUT to ANTIC_IN. */
2230 prune_clobbered_mems (ANTIC_OUT, block, any_max_on_edge);
2232 /* Generate ANTIC_OUT - TMP_GEN. */
2233 S = bitmap_set_subtract_expressions (ANTIC_OUT, TMP_GEN (block));
2235 /* Start ANTIC_IN with EXP_GEN - TMP_GEN. */
2236 ANTIC_IN (block) = bitmap_set_subtract_expressions (EXP_GEN (block),
2237 TMP_GEN (block));
2239 /* Then union in the ANTIC_OUT - TMP_GEN values,
2240 to get ANTIC_OUT U EXP_GEN - TMP_GEN */
2241 bitmap_ior_into (&ANTIC_IN (block)->values, &S->values);
2242 bitmap_ior_into (&ANTIC_IN (block)->expressions, &S->expressions);
2244 /* clean (ANTIC_IN (block)) is defered to after the iteration converged
2245 because it can cause non-convergence, see for example PR81181. */
2247 /* Intersect ANTIC_IN with the old ANTIC_IN. This is required until
2248 we properly represent the maximum expression set, thus not prune
2249 values without expressions during the iteration. */
2250 if (was_visited
2251 && bitmap_and_into (&ANTIC_IN (block)->values, &old->values))
2253 if (dump_file && (dump_flags & TDF_DETAILS))
2254 fprintf (dump_file, "warning: intersecting with old ANTIC_IN "
2255 "shrinks the set\n");
2256 /* Prune expressions not in the value set. */
2257 bitmap_iterator bi;
2258 unsigned int i;
2259 unsigned int to_clear = -1U;
2260 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (block), i, bi)
2262 if (to_clear != -1U)
2264 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear);
2265 to_clear = -1U;
2267 pre_expr expr = expression_for_id (i);
2268 unsigned int value_id = get_expr_value_id (expr);
2269 if (!bitmap_bit_p (&ANTIC_IN (block)->values, value_id))
2270 to_clear = i;
2272 if (to_clear != -1U)
2273 bitmap_clear_bit (&ANTIC_IN (block)->expressions, to_clear);
2276 if (!bitmap_set_equal (old, ANTIC_IN (block)))
2277 changed = true;
2279 maybe_dump_sets:
2280 if (dump_file && (dump_flags & TDF_DETAILS))
2282 if (changed)
2283 fprintf (dump_file, "[changed] ");
2284 print_bitmap_set (dump_file, ANTIC_IN (block), "ANTIC_IN",
2285 block->index);
2287 if (S)
2288 print_bitmap_set (dump_file, S, "S", block->index);
2290 if (old)
2291 bitmap_set_free (old);
2292 if (S)
2293 bitmap_set_free (S);
2294 if (ANTIC_OUT)
2295 bitmap_set_free (ANTIC_OUT);
2296 return changed;
2299 /* Compute PARTIAL_ANTIC for BLOCK.
2301 If succs(BLOCK) > 1 then
2302 PA_OUT[BLOCK] = value wise union of PA_IN[b] + all ANTIC_IN not
2303 in ANTIC_OUT for all succ(BLOCK)
2304 else if succs(BLOCK) == 1 then
2305 PA_OUT[BLOCK] = phi_translate (PA_IN[succ(BLOCK)])
2307 PA_IN[BLOCK] = clean(PA_OUT[BLOCK] - TMP_GEN[BLOCK] - ANTIC_IN[BLOCK])
2310 static void
2311 compute_partial_antic_aux (basic_block block,
2312 bool block_has_abnormal_pred_edge)
2314 bitmap_set_t old_PA_IN;
2315 bitmap_set_t PA_OUT;
2316 edge e;
2317 edge_iterator ei;
2318 unsigned long max_pa = param_max_partial_antic_length;
2320 old_PA_IN = PA_OUT = NULL;
2322 /* If any edges from predecessors are abnormal, antic_in is empty,
2323 so do nothing. */
2324 if (block_has_abnormal_pred_edge)
2325 goto maybe_dump_sets;
2327 /* If there are too many partially anticipatable values in the
2328 block, phi_translate_set can take an exponential time: stop
2329 before the translation starts. */
2330 if (max_pa
2331 && single_succ_p (block)
2332 && bitmap_count_bits (&PA_IN (single_succ (block))->values) > max_pa)
2333 goto maybe_dump_sets;
2335 old_PA_IN = PA_IN (block);
2336 PA_OUT = bitmap_set_new ();
2338 /* If the block has no successors, ANTIC_OUT is empty. */
2339 if (EDGE_COUNT (block->succs) == 0)
2341 /* If we have one successor, we could have some phi nodes to
2342 translate through. Note that we can't phi translate across DFS
2343 back edges in partial antic, because it uses a union operation on
2344 the successors. For recurrences like IV's, we will end up
2345 generating a new value in the set on each go around (i + 3 (VH.1)
2346 VH.1 + 1 (VH.2), VH.2 + 1 (VH.3), etc), forever. */
2347 else if (single_succ_p (block))
2349 e = single_succ_edge (block);
2350 if (!(e->flags & EDGE_DFS_BACK))
2351 phi_translate_set (PA_OUT, PA_IN (e->dest), e);
2353 /* If we have multiple successors, we take the union of all of
2354 them. */
2355 else
2357 size_t i;
2359 auto_vec<edge> worklist (EDGE_COUNT (block->succs));
2360 FOR_EACH_EDGE (e, ei, block->succs)
2362 if (e->flags & EDGE_DFS_BACK)
2363 continue;
2364 worklist.quick_push (e);
2366 if (worklist.length () > 0)
2368 FOR_EACH_VEC_ELT (worklist, i, e)
2370 unsigned int i;
2371 bitmap_iterator bi;
2373 if (!gimple_seq_empty_p (phi_nodes (e->dest)))
2375 bitmap_set_t antic_in = bitmap_set_new ();
2376 phi_translate_set (antic_in, ANTIC_IN (e->dest), e);
2377 FOR_EACH_EXPR_ID_IN_SET (antic_in, i, bi)
2378 bitmap_value_insert_into_set (PA_OUT,
2379 expression_for_id (i));
2380 bitmap_set_free (antic_in);
2381 bitmap_set_t pa_in = bitmap_set_new ();
2382 phi_translate_set (pa_in, PA_IN (e->dest), e);
2383 FOR_EACH_EXPR_ID_IN_SET (pa_in, i, bi)
2384 bitmap_value_insert_into_set (PA_OUT,
2385 expression_for_id (i));
2386 bitmap_set_free (pa_in);
2388 else
2390 FOR_EACH_EXPR_ID_IN_SET (ANTIC_IN (e->dest), i, bi)
2391 bitmap_value_insert_into_set (PA_OUT,
2392 expression_for_id (i));
2393 FOR_EACH_EXPR_ID_IN_SET (PA_IN (e->dest), i, bi)
2394 bitmap_value_insert_into_set (PA_OUT,
2395 expression_for_id (i));
2401 /* Prune expressions that are clobbered in block and thus become
2402 invalid if translated from PA_OUT to PA_IN. */
2403 prune_clobbered_mems (PA_OUT, block, false);
2405 /* PA_IN starts with PA_OUT - TMP_GEN.
2406 Then we subtract things from ANTIC_IN. */
2407 PA_IN (block) = bitmap_set_subtract_expressions (PA_OUT, TMP_GEN (block));
2409 /* For partial antic, we want to put back in the phi results, since
2410 we will properly avoid making them partially antic over backedges. */
2411 bitmap_ior_into (&PA_IN (block)->values, &PHI_GEN (block)->values);
2412 bitmap_ior_into (&PA_IN (block)->expressions, &PHI_GEN (block)->expressions);
2414 /* PA_IN[block] = PA_IN[block] - ANTIC_IN[block] */
2415 bitmap_set_subtract_values (PA_IN (block), ANTIC_IN (block));
2417 clean (PA_IN (block), ANTIC_IN (block));
2419 maybe_dump_sets:
2420 if (dump_file && (dump_flags & TDF_DETAILS))
2422 if (PA_OUT)
2423 print_bitmap_set (dump_file, PA_OUT, "PA_OUT", block->index);
2425 print_bitmap_set (dump_file, PA_IN (block), "PA_IN", block->index);
2427 if (old_PA_IN)
2428 bitmap_set_free (old_PA_IN);
2429 if (PA_OUT)
2430 bitmap_set_free (PA_OUT);
2433 /* Compute ANTIC and partial ANTIC sets. */
2435 static void
2436 compute_antic (void)
2438 bool changed = true;
2439 int num_iterations = 0;
2440 basic_block block;
2441 int i;
2442 edge_iterator ei;
2443 edge e;
2445 /* If any predecessor edges are abnormal, we punt, so antic_in is empty.
2446 We pre-build the map of blocks with incoming abnormal edges here. */
2447 auto_sbitmap has_abnormal_preds (last_basic_block_for_fn (cfun));
2448 bitmap_clear (has_abnormal_preds);
2450 FOR_ALL_BB_FN (block, cfun)
2452 BB_VISITED (block) = 0;
2454 FOR_EACH_EDGE (e, ei, block->preds)
2455 if (e->flags & EDGE_ABNORMAL)
2457 bitmap_set_bit (has_abnormal_preds, block->index);
2458 break;
2461 /* While we are here, give empty ANTIC_IN sets to each block. */
2462 ANTIC_IN (block) = bitmap_set_new ();
2463 if (do_partial_partial)
2464 PA_IN (block) = bitmap_set_new ();
2467 /* At the exit block we anticipate nothing. */
2468 BB_VISITED (EXIT_BLOCK_PTR_FOR_FN (cfun)) = 1;
2470 /* For ANTIC computation we need a postorder that also guarantees that
2471 a block with a single successor is visited after its successor.
2472 RPO on the inverted CFG has this property. */
2473 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2474 int n = inverted_rev_post_order_compute (cfun, rpo);
2476 auto_sbitmap worklist (last_basic_block_for_fn (cfun) + 1);
2477 bitmap_clear (worklist);
2478 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
2479 bitmap_set_bit (worklist, e->src->index);
2480 while (changed)
2482 if (dump_file && (dump_flags & TDF_DETAILS))
2483 fprintf (dump_file, "Starting iteration %d\n", num_iterations);
2484 /* ??? We need to clear our PHI translation cache here as the
2485 ANTIC sets shrink and we restrict valid translations to
2486 those having operands with leaders in ANTIC. Same below
2487 for PA ANTIC computation. */
2488 num_iterations++;
2489 changed = false;
2490 for (i = 0; i < n; ++i)
2492 if (bitmap_bit_p (worklist, rpo[i]))
2494 basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
2495 bitmap_clear_bit (worklist, block->index);
2496 if (compute_antic_aux (block,
2497 bitmap_bit_p (has_abnormal_preds,
2498 block->index)))
2500 FOR_EACH_EDGE (e, ei, block->preds)
2501 bitmap_set_bit (worklist, e->src->index);
2502 changed = true;
2506 /* Theoretically possible, but *highly* unlikely. */
2507 gcc_checking_assert (num_iterations < 500);
2510 /* We have to clean after the dataflow problem converged as cleaning
2511 can cause non-convergence because it is based on expressions
2512 rather than values. */
2513 FOR_EACH_BB_FN (block, cfun)
2514 clean (ANTIC_IN (block));
2516 statistics_histogram_event (cfun, "compute_antic iterations",
2517 num_iterations);
2519 if (do_partial_partial)
2521 /* For partial antic we ignore backedges and thus we do not need
2522 to perform any iteration when we process blocks in rpo. */
2523 for (i = 0; i < n; ++i)
2525 basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
2526 compute_partial_antic_aux (block,
2527 bitmap_bit_p (has_abnormal_preds,
2528 block->index));
2532 free (rpo);
2536 /* Inserted expressions are placed onto this worklist, which is used
2537 for performing quick dead code elimination of insertions we made
2538 that didn't turn out to be necessary. */
2539 static bitmap inserted_exprs;
2541 /* The actual worker for create_component_ref_by_pieces. */
2543 static tree
2544 create_component_ref_by_pieces_1 (basic_block block, vn_reference_t ref,
2545 unsigned int *operand, gimple_seq *stmts)
2547 vn_reference_op_t currop = &ref->operands[*operand];
2548 tree genop;
2549 ++*operand;
2550 switch (currop->opcode)
2552 case CALL_EXPR:
2553 gcc_unreachable ();
2555 case MEM_REF:
2557 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand,
2558 stmts);
2559 if (!baseop)
2560 return NULL_TREE;
2561 tree offset = currop->op0;
2562 if (TREE_CODE (baseop) == ADDR_EXPR
2563 && handled_component_p (TREE_OPERAND (baseop, 0)))
2565 poly_int64 off;
2566 tree base;
2567 base = get_addr_base_and_unit_offset (TREE_OPERAND (baseop, 0),
2568 &off);
2569 gcc_assert (base);
2570 offset = int_const_binop (PLUS_EXPR, offset,
2571 build_int_cst (TREE_TYPE (offset),
2572 off));
2573 baseop = build_fold_addr_expr (base);
2575 genop = build2 (MEM_REF, currop->type, baseop, offset);
2576 MR_DEPENDENCE_CLIQUE (genop) = currop->clique;
2577 MR_DEPENDENCE_BASE (genop) = currop->base;
2578 REF_REVERSE_STORAGE_ORDER (genop) = currop->reverse;
2579 return genop;
2582 case TARGET_MEM_REF:
2584 tree genop0 = NULL_TREE, genop1 = NULL_TREE;
2585 vn_reference_op_t nextop = &ref->operands[(*operand)++];
2586 tree baseop = create_component_ref_by_pieces_1 (block, ref, operand,
2587 stmts);
2588 if (!baseop)
2589 return NULL_TREE;
2590 if (currop->op0)
2592 genop0 = find_or_generate_expression (block, currop->op0, stmts);
2593 if (!genop0)
2594 return NULL_TREE;
2596 if (nextop->op0)
2598 genop1 = find_or_generate_expression (block, nextop->op0, stmts);
2599 if (!genop1)
2600 return NULL_TREE;
2602 genop = build5 (TARGET_MEM_REF, currop->type,
2603 baseop, currop->op2, genop0, currop->op1, genop1);
2605 MR_DEPENDENCE_CLIQUE (genop) = currop->clique;
2606 MR_DEPENDENCE_BASE (genop) = currop->base;
2607 return genop;
2610 case ADDR_EXPR:
2611 if (currop->op0)
2613 gcc_assert (is_gimple_min_invariant (currop->op0));
2614 return currop->op0;
2616 /* Fallthrough. */
2617 case REALPART_EXPR:
2618 case IMAGPART_EXPR:
2619 case VIEW_CONVERT_EXPR:
2621 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2622 stmts);
2623 if (!genop0)
2624 return NULL_TREE;
2625 return build1 (currop->opcode, currop->type, genop0);
2628 case WITH_SIZE_EXPR:
2630 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2631 stmts);
2632 if (!genop0)
2633 return NULL_TREE;
2634 tree genop1 = find_or_generate_expression (block, currop->op0, stmts);
2635 if (!genop1)
2636 return NULL_TREE;
2637 return build2 (currop->opcode, currop->type, genop0, genop1);
2640 case BIT_FIELD_REF:
2642 tree genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2643 stmts);
2644 if (!genop0)
2645 return NULL_TREE;
2646 tree op1 = currop->op0;
2647 tree op2 = currop->op1;
2648 tree t = build3 (BIT_FIELD_REF, currop->type, genop0, op1, op2);
2649 REF_REVERSE_STORAGE_ORDER (t) = currop->reverse;
2650 return t;
2653 /* For array ref vn_reference_op's, operand 1 of the array ref
2654 is op0 of the reference op and operand 3 of the array ref is
2655 op1. */
2656 case ARRAY_RANGE_REF:
2657 case ARRAY_REF:
2659 tree genop0;
2660 tree genop1 = currop->op0;
2661 tree genop2 = currop->op1;
2662 tree genop3 = currop->op2;
2663 genop0 = create_component_ref_by_pieces_1 (block, ref, operand,
2664 stmts);
2665 if (!genop0)
2666 return NULL_TREE;
2667 genop1 = find_or_generate_expression (block, genop1, stmts);
2668 if (!genop1)
2669 return NULL_TREE;
2670 if (genop2)
2672 tree domain_type = TYPE_DOMAIN (TREE_TYPE (genop0));
2673 /* Drop zero minimum index if redundant. */
2674 if (integer_zerop (genop2)
2675 && (!domain_type
2676 || integer_zerop (TYPE_MIN_VALUE (domain_type))))
2677 genop2 = NULL_TREE;
2678 else
2680 genop2 = find_or_generate_expression (block, genop2, stmts);
2681 if (!genop2)
2682 return NULL_TREE;
2685 if (genop3)
2687 tree elmt_type = TREE_TYPE (TREE_TYPE (genop0));
2688 /* We can't always put a size in units of the element alignment
2689 here as the element alignment may be not visible. See
2690 PR43783. Simply drop the element size for constant
2691 sizes. */
2692 if ((TREE_CODE (genop3) == INTEGER_CST
2693 && TREE_CODE (TYPE_SIZE_UNIT (elmt_type)) == INTEGER_CST
2694 && wi::eq_p (wi::to_offset (TYPE_SIZE_UNIT (elmt_type)),
2695 (wi::to_offset (genop3) * vn_ref_op_align_unit (currop))))
2696 || (TREE_CODE (genop3) == EXACT_DIV_EXPR
2697 && TREE_CODE (TREE_OPERAND (genop3, 1)) == INTEGER_CST
2698 && operand_equal_p (TREE_OPERAND (genop3, 0), TYPE_SIZE_UNIT (elmt_type))
2699 && wi::eq_p (wi::to_offset (TREE_OPERAND (genop3, 1)),
2700 vn_ref_op_align_unit (currop))))
2701 genop3 = NULL_TREE;
2702 else
2704 genop3 = find_or_generate_expression (block, genop3, stmts);
2705 if (!genop3)
2706 return NULL_TREE;
2709 return build4 (currop->opcode, currop->type, genop0, genop1,
2710 genop2, genop3);
2712 case COMPONENT_REF:
2714 tree op0;
2715 tree op1;
2716 tree genop2 = currop->op1;
2717 op0 = create_component_ref_by_pieces_1 (block, ref, operand, stmts);
2718 if (!op0)
2719 return NULL_TREE;
2720 /* op1 should be a FIELD_DECL, which are represented by themselves. */
2721 op1 = currop->op0;
2722 if (genop2)
2724 genop2 = find_or_generate_expression (block, genop2, stmts);
2725 if (!genop2)
2726 return NULL_TREE;
2728 return build3 (COMPONENT_REF, TREE_TYPE (op1), op0, op1, genop2);
2731 case SSA_NAME:
2733 genop = find_or_generate_expression (block, currop->op0, stmts);
2734 return genop;
2736 case STRING_CST:
2737 case INTEGER_CST:
2738 case POLY_INT_CST:
2739 case COMPLEX_CST:
2740 case VECTOR_CST:
2741 case REAL_CST:
2742 case CONSTRUCTOR:
2743 case VAR_DECL:
2744 case PARM_DECL:
2745 case CONST_DECL:
2746 case RESULT_DECL:
2747 case FUNCTION_DECL:
2748 return currop->op0;
2750 default:
2751 gcc_unreachable ();
2755 /* For COMPONENT_REF's and ARRAY_REF's, we can't have any intermediates for the
2756 COMPONENT_REF or MEM_REF or ARRAY_REF portion, because we'd end up with
2757 trying to rename aggregates into ssa form directly, which is a no no.
2759 Thus, this routine doesn't create temporaries, it just builds a
2760 single access expression for the array, calling
2761 find_or_generate_expression to build the innermost pieces.
2763 This function is a subroutine of create_expression_by_pieces, and
2764 should not be called on it's own unless you really know what you
2765 are doing. */
2767 static tree
2768 create_component_ref_by_pieces (basic_block block, vn_reference_t ref,
2769 gimple_seq *stmts)
2771 unsigned int op = 0;
2772 return create_component_ref_by_pieces_1 (block, ref, &op, stmts);
2775 /* Find a simple leader for an expression, or generate one using
2776 create_expression_by_pieces from a NARY expression for the value.
2777 BLOCK is the basic_block we are looking for leaders in.
2778 OP is the tree expression to find a leader for or generate.
2779 Returns the leader or NULL_TREE on failure. */
2781 static tree
2782 find_or_generate_expression (basic_block block, tree op, gimple_seq *stmts)
2784 /* Constants are always leaders. */
2785 if (is_gimple_min_invariant (op))
2786 return op;
2788 gcc_assert (TREE_CODE (op) == SSA_NAME);
2789 vn_ssa_aux_t info = VN_INFO (op);
2790 unsigned int lookfor = info->value_id;
2791 if (value_id_constant_p (lookfor))
2792 return info->valnum;
2794 pre_expr leader = bitmap_find_leader (AVAIL_OUT (block), lookfor);
2795 if (leader)
2797 if (leader->kind == NAME)
2798 return PRE_EXPR_NAME (leader);
2799 else if (leader->kind == CONSTANT)
2800 return PRE_EXPR_CONSTANT (leader);
2802 /* Defer. */
2803 return NULL_TREE;
2805 gcc_assert (!value_id_constant_p (lookfor));
2807 /* It must be a complex expression, so generate it recursively. Note
2808 that this is only necessary to handle gcc.dg/tree-ssa/ssa-pre28.c
2809 where the insert algorithm fails to insert a required expression. */
2810 bitmap exprset = value_expressions[lookfor];
2811 bitmap_iterator bi;
2812 unsigned int i;
2813 EXECUTE_IF_SET_IN_BITMAP (exprset, 0, i, bi)
2815 pre_expr temp = expression_for_id (i);
2816 /* We cannot insert random REFERENCE expressions at arbitrary
2817 places. We can insert NARYs which eventually re-materializes
2818 its operand values. */
2819 if (temp->kind == NARY)
2820 return create_expression_by_pieces (block, temp, stmts,
2821 TREE_TYPE (op));
2824 /* Defer. */
2825 return NULL_TREE;
2828 /* Create an expression in pieces, so that we can handle very complex
2829 expressions that may be ANTIC, but not necessary GIMPLE.
2830 BLOCK is the basic block the expression will be inserted into,
2831 EXPR is the expression to insert (in value form)
2832 STMTS is a statement list to append the necessary insertions into.
2834 This function will die if we hit some value that shouldn't be
2835 ANTIC but is (IE there is no leader for it, or its components).
2836 The function returns NULL_TREE in case a different antic expression
2837 has to be inserted first.
2838 This function may also generate expressions that are themselves
2839 partially or fully redundant. Those that are will be either made
2840 fully redundant during the next iteration of insert (for partially
2841 redundant ones), or eliminated by eliminate (for fully redundant
2842 ones). */
2844 static tree
2845 create_expression_by_pieces (basic_block block, pre_expr expr,
2846 gimple_seq *stmts, tree type)
2848 tree name;
2849 tree folded;
2850 gimple_seq forced_stmts = NULL;
2851 unsigned int value_id;
2852 gimple_stmt_iterator gsi;
2853 tree exprtype = type ? type : get_expr_type (expr);
2854 pre_expr nameexpr;
2855 gassign *newstmt;
2857 switch (expr->kind)
2859 /* We may hit the NAME/CONSTANT case if we have to convert types
2860 that value numbering saw through. */
2861 case NAME:
2862 folded = PRE_EXPR_NAME (expr);
2863 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (folded))
2864 return NULL_TREE;
2865 if (useless_type_conversion_p (exprtype, TREE_TYPE (folded)))
2866 return folded;
2867 break;
2868 case CONSTANT:
2870 folded = PRE_EXPR_CONSTANT (expr);
2871 tree tem = fold_convert (exprtype, folded);
2872 if (is_gimple_min_invariant (tem))
2873 return tem;
2874 break;
2876 case REFERENCE:
2877 if (PRE_EXPR_REFERENCE (expr)->operands[0].opcode == CALL_EXPR)
2879 vn_reference_t ref = PRE_EXPR_REFERENCE (expr);
2880 unsigned int operand = 1;
2881 vn_reference_op_t currop = &ref->operands[0];
2882 tree sc = NULL_TREE;
2883 tree fn = NULL_TREE;
2884 if (currop->op0)
2886 fn = find_or_generate_expression (block, currop->op0, stmts);
2887 if (!fn)
2888 return NULL_TREE;
2890 if (currop->op1)
2892 sc = find_or_generate_expression (block, currop->op1, stmts);
2893 if (!sc)
2894 return NULL_TREE;
2896 auto_vec<tree> args (ref->operands.length () - 1);
2897 while (operand < ref->operands.length ())
2899 tree arg = create_component_ref_by_pieces_1 (block, ref,
2900 &operand, stmts);
2901 if (!arg)
2902 return NULL_TREE;
2903 args.quick_push (arg);
2905 gcall *call;
2906 if (currop->op0)
2908 call = gimple_build_call_vec (fn, args);
2909 gimple_call_set_fntype (call, currop->type);
2911 else
2912 call = gimple_build_call_internal_vec ((internal_fn)currop->clique,
2913 args);
2914 gimple_set_location (call, expr->loc);
2915 if (sc)
2916 gimple_call_set_chain (call, sc);
2917 tree forcedname = make_ssa_name (ref->type);
2918 gimple_call_set_lhs (call, forcedname);
2919 /* There's no CCP pass after PRE which would re-compute alignment
2920 information so make sure we re-materialize this here. */
2921 if (gimple_call_builtin_p (call, BUILT_IN_ASSUME_ALIGNED)
2922 && args.length () - 2 <= 1
2923 && tree_fits_uhwi_p (args[1])
2924 && (args.length () != 3 || tree_fits_uhwi_p (args[2])))
2926 unsigned HOST_WIDE_INT halign = tree_to_uhwi (args[1]);
2927 unsigned HOST_WIDE_INT hmisalign
2928 = args.length () == 3 ? tree_to_uhwi (args[2]) : 0;
2929 if ((halign & (halign - 1)) == 0
2930 && (hmisalign & ~(halign - 1)) == 0
2931 && (unsigned int)halign != 0)
2932 set_ptr_info_alignment (get_ptr_info (forcedname),
2933 halign, hmisalign);
2935 gimple_set_vuse (call, BB_LIVE_VOP_ON_EXIT (block));
2936 gimple_seq_add_stmt_without_update (&forced_stmts, call);
2937 folded = forcedname;
2939 else
2941 folded = create_component_ref_by_pieces (block,
2942 PRE_EXPR_REFERENCE (expr),
2943 stmts);
2944 if (!folded)
2945 return NULL_TREE;
2946 name = make_temp_ssa_name (exprtype, NULL, "pretmp");
2947 newstmt = gimple_build_assign (name, folded);
2948 gimple_set_location (newstmt, expr->loc);
2949 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt);
2950 gimple_set_vuse (newstmt, BB_LIVE_VOP_ON_EXIT (block));
2951 folded = name;
2953 break;
2954 case NARY:
2956 vn_nary_op_t nary = PRE_EXPR_NARY (expr);
2957 tree *genop = XALLOCAVEC (tree, nary->length);
2958 unsigned i;
2959 for (i = 0; i < nary->length; ++i)
2961 genop[i] = find_or_generate_expression (block, nary->op[i], stmts);
2962 if (!genop[i])
2963 return NULL_TREE;
2964 /* Ensure genop[] is properly typed for POINTER_PLUS_EXPR. It
2965 may have conversions stripped. */
2966 if (nary->opcode == POINTER_PLUS_EXPR)
2968 if (i == 0)
2969 genop[i] = gimple_convert (&forced_stmts,
2970 nary->type, genop[i]);
2971 else if (i == 1)
2972 genop[i] = gimple_convert (&forced_stmts,
2973 sizetype, genop[i]);
2975 else
2976 genop[i] = gimple_convert (&forced_stmts,
2977 TREE_TYPE (nary->op[i]), genop[i]);
2979 if (nary->opcode == CONSTRUCTOR)
2981 vec<constructor_elt, va_gc> *elts = NULL;
2982 for (i = 0; i < nary->length; ++i)
2983 CONSTRUCTOR_APPEND_ELT (elts, NULL_TREE, genop[i]);
2984 folded = build_constructor (nary->type, elts);
2985 name = make_temp_ssa_name (exprtype, NULL, "pretmp");
2986 newstmt = gimple_build_assign (name, folded);
2987 gimple_set_location (newstmt, expr->loc);
2988 gimple_seq_add_stmt_without_update (&forced_stmts, newstmt);
2989 folded = name;
2991 else
2993 switch (nary->length)
2995 case 1:
2996 folded = gimple_build (&forced_stmts, expr->loc,
2997 nary->opcode, nary->type, genop[0]);
2998 break;
2999 case 2:
3000 folded = gimple_build (&forced_stmts, expr->loc, nary->opcode,
3001 nary->type, genop[0], genop[1]);
3002 break;
3003 case 3:
3004 folded = gimple_build (&forced_stmts, expr->loc, nary->opcode,
3005 nary->type, genop[0], genop[1],
3006 genop[2]);
3007 break;
3008 default:
3009 gcc_unreachable ();
3013 break;
3014 default:
3015 gcc_unreachable ();
3018 folded = gimple_convert (&forced_stmts, exprtype, folded);
3020 /* If there is nothing to insert, return the simplified result. */
3021 if (gimple_seq_empty_p (forced_stmts))
3022 return folded;
3023 /* If we simplified to a constant return it and discard eventually
3024 built stmts. */
3025 if (is_gimple_min_invariant (folded))
3027 gimple_seq_discard (forced_stmts);
3028 return folded;
3030 /* Likewise if we simplified to sth not queued for insertion. */
3031 bool found = false;
3032 gsi = gsi_last (forced_stmts);
3033 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
3035 gimple *stmt = gsi_stmt (gsi);
3036 tree forcedname = gimple_get_lhs (stmt);
3037 if (forcedname == folded)
3039 found = true;
3040 break;
3043 if (! found)
3045 gimple_seq_discard (forced_stmts);
3046 return folded;
3048 gcc_assert (TREE_CODE (folded) == SSA_NAME);
3050 /* If we have any intermediate expressions to the value sets, add them
3051 to the value sets and chain them in the instruction stream. */
3052 if (forced_stmts)
3054 gsi = gsi_start (forced_stmts);
3055 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3057 gimple *stmt = gsi_stmt (gsi);
3058 tree forcedname = gimple_get_lhs (stmt);
3059 pre_expr nameexpr;
3061 if (forcedname != folded)
3063 vn_ssa_aux_t vn_info = VN_INFO (forcedname);
3064 vn_info->valnum = forcedname;
3065 vn_info->value_id = get_next_value_id ();
3066 nameexpr = get_or_alloc_expr_for_name (forcedname);
3067 add_to_value (vn_info->value_id, nameexpr);
3068 if (NEW_SETS (block))
3069 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr);
3070 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr);
3073 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (forcedname));
3075 gimple_seq_add_seq (stmts, forced_stmts);
3078 name = folded;
3080 /* Fold the last statement. */
3081 gsi = gsi_last (*stmts);
3082 if (fold_stmt_inplace (&gsi))
3083 update_stmt (gsi_stmt (gsi));
3085 /* Add a value number to the temporary.
3086 The value may already exist in either NEW_SETS, or AVAIL_OUT, because
3087 we are creating the expression by pieces, and this particular piece of
3088 the expression may have been represented. There is no harm in replacing
3089 here. */
3090 value_id = get_expr_value_id (expr);
3091 vn_ssa_aux_t vn_info = VN_INFO (name);
3092 vn_info->value_id = value_id;
3093 vn_info->valnum = vn_valnum_from_value_id (value_id);
3094 if (vn_info->valnum == NULL_TREE)
3095 vn_info->valnum = name;
3096 gcc_assert (vn_info->valnum != NULL_TREE);
3097 nameexpr = get_or_alloc_expr_for_name (name);
3098 add_to_value (value_id, nameexpr);
3099 if (NEW_SETS (block))
3100 bitmap_value_replace_in_set (NEW_SETS (block), nameexpr);
3101 bitmap_value_replace_in_set (AVAIL_OUT (block), nameexpr);
3103 pre_stats.insertions++;
3104 if (dump_file && (dump_flags & TDF_DETAILS))
3106 fprintf (dump_file, "Inserted ");
3107 print_gimple_stmt (dump_file, gsi_stmt (gsi_last (*stmts)), 0);
3108 fprintf (dump_file, " in predecessor %d (%04d)\n",
3109 block->index, value_id);
3112 return name;
3116 /* Insert the to-be-made-available values of expression EXPRNUM for each
3117 predecessor, stored in AVAIL, into the predecessors of BLOCK, and
3118 merge the result with a phi node, given the same value number as
3119 NODE. Return true if we have inserted new stuff. */
3121 static bool
3122 insert_into_preds_of_block (basic_block block, unsigned int exprnum,
3123 vec<pre_expr> &avail)
3125 pre_expr expr = expression_for_id (exprnum);
3126 pre_expr newphi;
3127 unsigned int val = get_expr_value_id (expr);
3128 edge pred;
3129 bool insertions = false;
3130 bool nophi = false;
3131 basic_block bprime;
3132 pre_expr eprime;
3133 edge_iterator ei;
3134 tree type = get_expr_type (expr);
3135 tree temp;
3136 gphi *phi;
3138 /* Make sure we aren't creating an induction variable. */
3139 if (bb_loop_depth (block) > 0 && EDGE_COUNT (block->preds) == 2)
3141 bool firstinsideloop = false;
3142 bool secondinsideloop = false;
3143 firstinsideloop = flow_bb_inside_loop_p (block->loop_father,
3144 EDGE_PRED (block, 0)->src);
3145 secondinsideloop = flow_bb_inside_loop_p (block->loop_father,
3146 EDGE_PRED (block, 1)->src);
3147 /* Induction variables only have one edge inside the loop. */
3148 if ((firstinsideloop ^ secondinsideloop)
3149 && expr->kind != REFERENCE)
3151 if (dump_file && (dump_flags & TDF_DETAILS))
3152 fprintf (dump_file, "Skipping insertion of phi for partial "
3153 "redundancy: Looks like an induction variable\n");
3154 nophi = true;
3158 /* Make the necessary insertions. */
3159 FOR_EACH_EDGE (pred, ei, block->preds)
3161 /* When we are not inserting a PHI node do not bother inserting
3162 into places that do not dominate the anticipated computations. */
3163 if (nophi && !dominated_by_p (CDI_DOMINATORS, block, pred->src))
3164 continue;
3165 gimple_seq stmts = NULL;
3166 tree builtexpr;
3167 bprime = pred->src;
3168 eprime = avail[pred->dest_idx];
3169 builtexpr = create_expression_by_pieces (bprime, eprime,
3170 &stmts, type);
3171 gcc_assert (!(pred->flags & EDGE_ABNORMAL));
3172 if (!gimple_seq_empty_p (stmts))
3174 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pred, stmts);
3175 gcc_assert (! new_bb);
3176 insertions = true;
3178 if (!builtexpr)
3180 /* We cannot insert a PHI node if we failed to insert
3181 on one edge. */
3182 nophi = true;
3183 continue;
3185 if (is_gimple_min_invariant (builtexpr))
3186 avail[pred->dest_idx] = get_or_alloc_expr_for_constant (builtexpr);
3187 else
3188 avail[pred->dest_idx] = get_or_alloc_expr_for_name (builtexpr);
3190 /* If we didn't want a phi node, and we made insertions, we still have
3191 inserted new stuff, and thus return true. If we didn't want a phi node,
3192 and didn't make insertions, we haven't added anything new, so return
3193 false. */
3194 if (nophi && insertions)
3195 return true;
3196 else if (nophi && !insertions)
3197 return false;
3199 /* Now build a phi for the new variable. */
3200 temp = make_temp_ssa_name (type, NULL, "prephitmp");
3201 phi = create_phi_node (temp, block);
3203 vn_ssa_aux_t vn_info = VN_INFO (temp);
3204 vn_info->value_id = val;
3205 vn_info->valnum = vn_valnum_from_value_id (val);
3206 if (vn_info->valnum == NULL_TREE)
3207 vn_info->valnum = temp;
3208 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp));
3209 FOR_EACH_EDGE (pred, ei, block->preds)
3211 pre_expr ae = avail[pred->dest_idx];
3212 gcc_assert (get_expr_type (ae) == type
3213 || useless_type_conversion_p (type, get_expr_type (ae)));
3214 if (ae->kind == CONSTANT)
3215 add_phi_arg (phi, unshare_expr (PRE_EXPR_CONSTANT (ae)),
3216 pred, UNKNOWN_LOCATION);
3217 else
3218 add_phi_arg (phi, PRE_EXPR_NAME (ae), pred, UNKNOWN_LOCATION);
3221 newphi = get_or_alloc_expr_for_name (temp);
3222 add_to_value (val, newphi);
3224 /* The value should *not* exist in PHI_GEN, or else we wouldn't be doing
3225 this insertion, since we test for the existence of this value in PHI_GEN
3226 before proceeding with the partial redundancy checks in insert_aux.
3228 The value may exist in AVAIL_OUT, in particular, it could be represented
3229 by the expression we are trying to eliminate, in which case we want the
3230 replacement to occur. If it's not existing in AVAIL_OUT, we want it
3231 inserted there.
3233 Similarly, to the PHI_GEN case, the value should not exist in NEW_SETS of
3234 this block, because if it did, it would have existed in our dominator's
3235 AVAIL_OUT, and would have been skipped due to the full redundancy check.
3238 bitmap_insert_into_set (PHI_GEN (block), newphi);
3239 bitmap_value_replace_in_set (AVAIL_OUT (block),
3240 newphi);
3241 if (NEW_SETS (block))
3242 bitmap_insert_into_set (NEW_SETS (block), newphi);
3244 /* If we insert a PHI node for a conversion of another PHI node
3245 in the same basic-block try to preserve range information.
3246 This is important so that followup loop passes receive optimal
3247 number of iteration analysis results. See PR61743. */
3248 if (expr->kind == NARY
3249 && CONVERT_EXPR_CODE_P (expr->u.nary->opcode)
3250 && TREE_CODE (expr->u.nary->op[0]) == SSA_NAME
3251 && gimple_bb (SSA_NAME_DEF_STMT (expr->u.nary->op[0])) == block
3252 && INTEGRAL_TYPE_P (type)
3253 && INTEGRAL_TYPE_P (TREE_TYPE (expr->u.nary->op[0]))
3254 && (TYPE_PRECISION (type)
3255 >= TYPE_PRECISION (TREE_TYPE (expr->u.nary->op[0])))
3256 && SSA_NAME_RANGE_INFO (expr->u.nary->op[0]))
3258 int_range_max r;
3259 if (get_range_query (cfun)->range_of_expr (r, expr->u.nary->op[0])
3260 && !r.undefined_p ()
3261 && !r.varying_p ()
3262 && !wi::neg_p (r.lower_bound (), SIGNED)
3263 && !wi::neg_p (r.upper_bound (), SIGNED))
3265 /* Just handle extension and sign-changes of all-positive ranges. */
3266 range_cast (r, type);
3267 set_range_info (temp, r);
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3273 fprintf (dump_file, "Created phi ");
3274 print_gimple_stmt (dump_file, phi, 0);
3275 fprintf (dump_file, " in block %d (%04d)\n", block->index, val);
3277 pre_stats.phis++;
3278 return true;
3283 /* Perform insertion of partially redundant or hoistable values.
3284 For BLOCK, do the following:
3285 1. Propagate the NEW_SETS of the dominator into the current block.
3286 If the block has multiple predecessors,
3287 2a. Iterate over the ANTIC expressions for the block to see if
3288 any of them are partially redundant.
3289 2b. If so, insert them into the necessary predecessors to make
3290 the expression fully redundant.
3291 2c. Insert a new PHI merging the values of the predecessors.
3292 2d. Insert the new PHI, and the new expressions, into the
3293 NEW_SETS set.
3294 If the block has multiple successors,
3295 3a. Iterate over the ANTIC values for the block to see if
3296 any of them are good candidates for hoisting.
3297 3b. If so, insert expressions computing the values in BLOCK,
3298 and add the new expressions into the NEW_SETS set.
3299 4. Recursively call ourselves on the dominator children of BLOCK.
3301 Steps 1, 2a, and 4 are done by insert_aux. 2b, 2c and 2d are done by
3302 do_pre_regular_insertion and do_partial_insertion. 3a and 3b are
3303 done in do_hoist_insertion.
3306 static bool
3307 do_pre_regular_insertion (basic_block block, basic_block dom,
3308 vec<pre_expr> exprs)
3310 bool new_stuff = false;
3311 pre_expr expr;
3312 auto_vec<pre_expr, 2> avail;
3313 int i;
3315 avail.safe_grow (EDGE_COUNT (block->preds), true);
3317 FOR_EACH_VEC_ELT (exprs, i, expr)
3319 if (expr->kind == NARY
3320 || expr->kind == REFERENCE)
3322 unsigned int val;
3323 bool by_some = false;
3324 bool cant_insert = false;
3325 bool all_same = true;
3326 unsigned num_inserts = 0;
3327 unsigned num_const = 0;
3328 pre_expr first_s = NULL;
3329 edge pred;
3330 basic_block bprime;
3331 pre_expr eprime = NULL;
3332 edge_iterator ei;
3333 pre_expr edoubleprime = NULL;
3334 bool do_insertion = false;
3336 val = get_expr_value_id (expr);
3337 if (bitmap_set_contains_value (PHI_GEN (block), val))
3338 continue;
3339 if (bitmap_set_contains_value (AVAIL_OUT (dom), val))
3341 if (dump_file && (dump_flags & TDF_DETAILS))
3343 fprintf (dump_file, "Found fully redundant value: ");
3344 print_pre_expr (dump_file, expr);
3345 fprintf (dump_file, "\n");
3347 continue;
3350 FOR_EACH_EDGE (pred, ei, block->preds)
3352 unsigned int vprime;
3354 /* We should never run insertion for the exit block
3355 and so not come across fake pred edges. */
3356 gcc_assert (!(pred->flags & EDGE_FAKE));
3357 bprime = pred->src;
3358 /* We are looking at ANTIC_OUT of bprime. */
3359 eprime = phi_translate (NULL, expr, ANTIC_IN (block), NULL, pred);
3361 /* eprime will generally only be NULL if the
3362 value of the expression, translated
3363 through the PHI for this predecessor, is
3364 undefined. If that is the case, we can't
3365 make the expression fully redundant,
3366 because its value is undefined along a
3367 predecessor path. We can thus break out
3368 early because it doesn't matter what the
3369 rest of the results are. */
3370 if (eprime == NULL)
3372 avail[pred->dest_idx] = NULL;
3373 cant_insert = true;
3374 break;
3377 vprime = get_expr_value_id (eprime);
3378 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime),
3379 vprime);
3380 if (edoubleprime == NULL)
3382 avail[pred->dest_idx] = eprime;
3383 all_same = false;
3384 num_inserts++;
3386 else
3388 avail[pred->dest_idx] = edoubleprime;
3389 by_some = true;
3390 if (edoubleprime->kind == CONSTANT)
3391 num_const++;
3392 /* We want to perform insertions to remove a redundancy on
3393 a path in the CFG we want to optimize for speed. */
3394 if (optimize_edge_for_speed_p (pred))
3395 do_insertion = true;
3396 if (first_s == NULL)
3397 first_s = edoubleprime;
3398 else if (!pre_expr_d::equal (first_s, edoubleprime))
3399 all_same = false;
3402 /* If we can insert it, it's not the same value
3403 already existing along every predecessor, and
3404 it's defined by some predecessor, it is
3405 partially redundant. */
3406 if (!cant_insert && !all_same && by_some)
3408 /* If the expression is redundant on all edges and we need
3409 to at most insert one copy from a constant do the PHI
3410 insertion even when not optimizing a path that's to be
3411 optimized for speed. */
3412 if (num_inserts == 0 && num_const <= 1)
3413 do_insertion = true;
3414 if (!do_insertion)
3416 if (dump_file && (dump_flags & TDF_DETAILS))
3418 fprintf (dump_file, "Skipping partial redundancy for "
3419 "expression ");
3420 print_pre_expr (dump_file, expr);
3421 fprintf (dump_file, " (%04d), no redundancy on to be "
3422 "optimized for speed edge\n", val);
3425 else if (dbg_cnt (treepre_insert))
3427 if (dump_file && (dump_flags & TDF_DETAILS))
3429 fprintf (dump_file, "Found partial redundancy for "
3430 "expression ");
3431 print_pre_expr (dump_file, expr);
3432 fprintf (dump_file, " (%04d)\n",
3433 get_expr_value_id (expr));
3435 if (insert_into_preds_of_block (block,
3436 get_expression_id (expr),
3437 avail))
3438 new_stuff = true;
3441 /* If all edges produce the same value and that value is
3442 an invariant, then the PHI has the same value on all
3443 edges. Note this. */
3444 else if (!cant_insert
3445 && all_same
3446 && (edoubleprime->kind != NAME
3447 || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI
3448 (PRE_EXPR_NAME (edoubleprime))))
3450 gcc_assert (edoubleprime->kind == CONSTANT
3451 || edoubleprime->kind == NAME);
3453 tree temp = make_temp_ssa_name (get_expr_type (expr),
3454 NULL, "pretmp");
3455 gassign *assign
3456 = gimple_build_assign (temp,
3457 edoubleprime->kind == CONSTANT ?
3458 PRE_EXPR_CONSTANT (edoubleprime) :
3459 PRE_EXPR_NAME (edoubleprime));
3460 gimple_stmt_iterator gsi = gsi_after_labels (block);
3461 gsi_insert_before (&gsi, assign, GSI_NEW_STMT);
3463 vn_ssa_aux_t vn_info = VN_INFO (temp);
3464 vn_info->value_id = val;
3465 vn_info->valnum = vn_valnum_from_value_id (val);
3466 if (vn_info->valnum == NULL_TREE)
3467 vn_info->valnum = temp;
3468 bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (temp));
3469 pre_expr newe = get_or_alloc_expr_for_name (temp);
3470 add_to_value (val, newe);
3471 bitmap_value_replace_in_set (AVAIL_OUT (block), newe);
3472 bitmap_insert_into_set (NEW_SETS (block), newe);
3473 bitmap_insert_into_set (PHI_GEN (block), newe);
3478 return new_stuff;
3482 /* Perform insertion for partially anticipatable expressions. There
3483 is only one case we will perform insertion for these. This case is
3484 if the expression is partially anticipatable, and fully available.
3485 In this case, we know that putting it earlier will enable us to
3486 remove the later computation. */
3488 static bool
3489 do_pre_partial_partial_insertion (basic_block block, basic_block dom,
3490 vec<pre_expr> exprs)
3492 bool new_stuff = false;
3493 pre_expr expr;
3494 auto_vec<pre_expr, 2> avail;
3495 int i;
3497 avail.safe_grow (EDGE_COUNT (block->preds), true);
3499 FOR_EACH_VEC_ELT (exprs, i, expr)
3501 if (expr->kind == NARY
3502 || expr->kind == REFERENCE)
3504 unsigned int val;
3505 bool by_all = true;
3506 bool cant_insert = false;
3507 edge pred;
3508 basic_block bprime;
3509 pre_expr eprime = NULL;
3510 edge_iterator ei;
3512 val = get_expr_value_id (expr);
3513 if (bitmap_set_contains_value (PHI_GEN (block), val))
3514 continue;
3515 if (bitmap_set_contains_value (AVAIL_OUT (dom), val))
3516 continue;
3518 FOR_EACH_EDGE (pred, ei, block->preds)
3520 unsigned int vprime;
3521 pre_expr edoubleprime;
3523 /* We should never run insertion for the exit block
3524 and so not come across fake pred edges. */
3525 gcc_assert (!(pred->flags & EDGE_FAKE));
3526 bprime = pred->src;
3527 eprime = phi_translate (NULL, expr, ANTIC_IN (block),
3528 PA_IN (block), pred);
3530 /* eprime will generally only be NULL if the
3531 value of the expression, translated
3532 through the PHI for this predecessor, is
3533 undefined. If that is the case, we can't
3534 make the expression fully redundant,
3535 because its value is undefined along a
3536 predecessor path. We can thus break out
3537 early because it doesn't matter what the
3538 rest of the results are. */
3539 if (eprime == NULL)
3541 avail[pred->dest_idx] = NULL;
3542 cant_insert = true;
3543 break;
3546 vprime = get_expr_value_id (eprime);
3547 edoubleprime = bitmap_find_leader (AVAIL_OUT (bprime), vprime);
3548 avail[pred->dest_idx] = edoubleprime;
3549 if (edoubleprime == NULL)
3551 by_all = false;
3552 break;
3556 /* If we can insert it, it's not the same value
3557 already existing along every predecessor, and
3558 it's defined by some predecessor, it is
3559 partially redundant. */
3560 if (!cant_insert && by_all)
3562 edge succ;
3563 bool do_insertion = false;
3565 /* Insert only if we can remove a later expression on a path
3566 that we want to optimize for speed.
3567 The phi node that we will be inserting in BLOCK is not free,
3568 and inserting it for the sake of !optimize_for_speed successor
3569 may cause regressions on the speed path. */
3570 FOR_EACH_EDGE (succ, ei, block->succs)
3572 if (bitmap_set_contains_value (PA_IN (succ->dest), val)
3573 || bitmap_set_contains_value (ANTIC_IN (succ->dest), val))
3575 if (optimize_edge_for_speed_p (succ))
3576 do_insertion = true;
3580 if (!do_insertion)
3582 if (dump_file && (dump_flags & TDF_DETAILS))
3584 fprintf (dump_file, "Skipping partial partial redundancy "
3585 "for expression ");
3586 print_pre_expr (dump_file, expr);
3587 fprintf (dump_file, " (%04d), not (partially) anticipated "
3588 "on any to be optimized for speed edges\n", val);
3591 else if (dbg_cnt (treepre_insert))
3593 pre_stats.pa_insert++;
3594 if (dump_file && (dump_flags & TDF_DETAILS))
3596 fprintf (dump_file, "Found partial partial redundancy "
3597 "for expression ");
3598 print_pre_expr (dump_file, expr);
3599 fprintf (dump_file, " (%04d)\n",
3600 get_expr_value_id (expr));
3602 if (insert_into_preds_of_block (block,
3603 get_expression_id (expr),
3604 avail))
3605 new_stuff = true;
3611 return new_stuff;
3614 /* Insert expressions in BLOCK to compute hoistable values up.
3615 Return TRUE if something was inserted, otherwise return FALSE.
3616 The caller has to make sure that BLOCK has at least two successors. */
3618 static bool
3619 do_hoist_insertion (basic_block block)
3621 edge e;
3622 edge_iterator ei;
3623 bool new_stuff = false;
3624 unsigned i;
3625 gimple_stmt_iterator last;
3627 /* At least two successors, or else... */
3628 gcc_assert (EDGE_COUNT (block->succs) >= 2);
3630 /* Check that all successors of BLOCK are dominated by block.
3631 We could use dominated_by_p() for this, but actually there is a much
3632 quicker check: any successor that is dominated by BLOCK can't have
3633 more than one predecessor edge. */
3634 FOR_EACH_EDGE (e, ei, block->succs)
3635 if (! single_pred_p (e->dest))
3636 return false;
3638 /* Determine the insertion point. If we cannot safely insert before
3639 the last stmt if we'd have to, bail out. */
3640 last = gsi_last_bb (block);
3641 if (!gsi_end_p (last)
3642 && !is_ctrl_stmt (gsi_stmt (last))
3643 && stmt_ends_bb_p (gsi_stmt (last)))
3644 return false;
3646 /* We have multiple successors, compute ANTIC_OUT by taking the intersection
3647 of all of ANTIC_IN translating through PHI nodes. Track the union
3648 of the expression sets so we can pick a representative that is
3649 fully generatable out of hoistable expressions. */
3650 bitmap_set_t ANTIC_OUT = bitmap_set_new ();
3651 bool first = true;
3652 FOR_EACH_EDGE (e, ei, block->succs)
3654 if (first)
3656 phi_translate_set (ANTIC_OUT, ANTIC_IN (e->dest), e);
3657 first = false;
3659 else if (!gimple_seq_empty_p (phi_nodes (e->dest)))
3661 bitmap_set_t tmp = bitmap_set_new ();
3662 phi_translate_set (tmp, ANTIC_IN (e->dest), e);
3663 bitmap_and_into (&ANTIC_OUT->values, &tmp->values);
3664 bitmap_ior_into (&ANTIC_OUT->expressions, &tmp->expressions);
3665 bitmap_set_free (tmp);
3667 else
3669 bitmap_and_into (&ANTIC_OUT->values, &ANTIC_IN (e->dest)->values);
3670 bitmap_ior_into (&ANTIC_OUT->expressions,
3671 &ANTIC_IN (e->dest)->expressions);
3675 /* Compute the set of hoistable expressions from ANTIC_OUT. First compute
3676 hoistable values. */
3677 bitmap_set hoistable_set;
3679 /* A hoistable value must be in ANTIC_OUT(block)
3680 but not in AVAIL_OUT(BLOCK). */
3681 bitmap_initialize (&hoistable_set.values, &grand_bitmap_obstack);
3682 bitmap_and_compl (&hoistable_set.values,
3683 &ANTIC_OUT->values, &AVAIL_OUT (block)->values);
3685 /* Short-cut for a common case: hoistable_set is empty. */
3686 if (bitmap_empty_p (&hoistable_set.values))
3688 bitmap_set_free (ANTIC_OUT);
3689 return false;
3692 /* Compute which of the hoistable values is in AVAIL_OUT of
3693 at least one of the successors of BLOCK. */
3694 bitmap_head availout_in_some;
3695 bitmap_initialize (&availout_in_some, &grand_bitmap_obstack);
3696 FOR_EACH_EDGE (e, ei, block->succs)
3697 /* Do not consider expressions solely because their availability
3698 on loop exits. They'd be ANTIC-IN throughout the whole loop
3699 and thus effectively hoisted across loops by combination of
3700 PRE and hoisting. */
3701 if (! loop_exit_edge_p (block->loop_father, e))
3702 bitmap_ior_and_into (&availout_in_some, &hoistable_set.values,
3703 &AVAIL_OUT (e->dest)->values);
3704 bitmap_clear (&hoistable_set.values);
3706 /* Short-cut for a common case: availout_in_some is empty. */
3707 if (bitmap_empty_p (&availout_in_some))
3709 bitmap_set_free (ANTIC_OUT);
3710 return false;
3713 /* Hack hoistable_set in-place so we can use sorted_array_from_bitmap_set. */
3714 bitmap_move (&hoistable_set.values, &availout_in_some);
3715 hoistable_set.expressions = ANTIC_OUT->expressions;
3717 /* Now finally construct the topological-ordered expression set. */
3718 vec<pre_expr> exprs = sorted_array_from_bitmap_set (&hoistable_set);
3720 /* If there are candidate values for hoisting, insert expressions
3721 strategically to make the hoistable expressions fully redundant. */
3722 pre_expr expr;
3723 FOR_EACH_VEC_ELT (exprs, i, expr)
3725 /* While we try to sort expressions topologically above the
3726 sorting doesn't work out perfectly. Catch expressions we
3727 already inserted. */
3728 unsigned int value_id = get_expr_value_id (expr);
3729 if (bitmap_set_contains_value (AVAIL_OUT (block), value_id))
3731 if (dump_file && (dump_flags & TDF_DETAILS))
3733 fprintf (dump_file,
3734 "Already inserted expression for ");
3735 print_pre_expr (dump_file, expr);
3736 fprintf (dump_file, " (%04d)\n", value_id);
3738 continue;
3741 /* If we end up with a punned expression representation and this
3742 happens to be a float typed one give up - we can't know for
3743 sure whether all paths perform the floating-point load we are
3744 about to insert and on some targets this can cause correctness
3745 issues. See PR88240. */
3746 if (expr->kind == REFERENCE
3747 && PRE_EXPR_REFERENCE (expr)->punned
3748 && FLOAT_TYPE_P (get_expr_type (expr)))
3749 continue;
3751 /* Only hoist if the full expression is available for hoisting.
3752 This avoids hoisting values that are not common and for
3753 example evaluate an expression that's not valid to evaluate
3754 unconditionally (PR112310). */
3755 if (!valid_in_sets (&hoistable_set, AVAIL_OUT (block), expr))
3756 continue;
3758 /* OK, we should hoist this value. Perform the transformation. */
3759 pre_stats.hoist_insert++;
3760 if (dump_file && (dump_flags & TDF_DETAILS))
3762 fprintf (dump_file,
3763 "Inserting expression in block %d for code hoisting: ",
3764 block->index);
3765 print_pre_expr (dump_file, expr);
3766 fprintf (dump_file, " (%04d)\n", value_id);
3769 gimple_seq stmts = NULL;
3770 tree res = create_expression_by_pieces (block, expr, &stmts,
3771 get_expr_type (expr));
3773 /* Do not return true if expression creation ultimately
3774 did not insert any statements. */
3775 if (gimple_seq_empty_p (stmts))
3776 res = NULL_TREE;
3777 else
3779 if (gsi_end_p (last) || is_ctrl_stmt (gsi_stmt (last)))
3780 gsi_insert_seq_before (&last, stmts, GSI_SAME_STMT);
3781 else
3782 gsi_insert_seq_after (&last, stmts, GSI_NEW_STMT);
3785 /* Make sure to not return true if expression creation ultimately
3786 failed but also make sure to insert any stmts produced as they
3787 are tracked in inserted_exprs. */
3788 if (! res)
3789 continue;
3791 new_stuff = true;
3794 exprs.release ();
3795 bitmap_clear (&hoistable_set.values);
3796 bitmap_set_free (ANTIC_OUT);
3798 return new_stuff;
3801 /* Perform insertion of partially redundant and hoistable values. */
3803 static void
3804 insert (void)
3806 basic_block bb;
3808 FOR_ALL_BB_FN (bb, cfun)
3809 NEW_SETS (bb) = bitmap_set_new ();
3811 int *rpo = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
3812 int *bb_rpo = XNEWVEC (int, last_basic_block_for_fn (cfun) + 1);
3813 int rpo_num = pre_and_rev_post_order_compute (NULL, rpo, false);
3814 for (int i = 0; i < rpo_num; ++i)
3815 bb_rpo[rpo[i]] = i;
3817 int num_iterations = 0;
3818 bool changed;
3821 num_iterations++;
3822 if (dump_file && dump_flags & TDF_DETAILS)
3823 fprintf (dump_file, "Starting insert iteration %d\n", num_iterations);
3825 changed = false;
3826 for (int idx = 0; idx < rpo_num; ++idx)
3828 basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[idx]);
3829 basic_block dom = get_immediate_dominator (CDI_DOMINATORS, block);
3830 if (dom)
3832 unsigned i;
3833 bitmap_iterator bi;
3834 bitmap_set_t newset;
3836 /* First, update the AVAIL_OUT set with anything we may have
3837 inserted higher up in the dominator tree. */
3838 newset = NEW_SETS (dom);
3840 /* Note that we need to value_replace both NEW_SETS, and
3841 AVAIL_OUT. For both the case of NEW_SETS, the value may be
3842 represented by some non-simple expression here that we want
3843 to replace it with. */
3844 bool avail_out_changed = false;
3845 FOR_EACH_EXPR_ID_IN_SET (newset, i, bi)
3847 pre_expr expr = expression_for_id (i);
3848 bitmap_value_replace_in_set (NEW_SETS (block), expr);
3849 avail_out_changed
3850 |= bitmap_value_replace_in_set (AVAIL_OUT (block), expr);
3852 /* We need to iterate if AVAIL_OUT of an already processed
3853 block source changed. */
3854 if (avail_out_changed && !changed)
3856 edge_iterator ei;
3857 edge e;
3858 FOR_EACH_EDGE (e, ei, block->succs)
3859 if (e->dest->index != EXIT_BLOCK
3860 && bb_rpo[e->dest->index] < idx)
3861 changed = true;
3864 /* Insert expressions for partial redundancies. */
3865 if (flag_tree_pre && !single_pred_p (block))
3867 vec<pre_expr> exprs
3868 = sorted_array_from_bitmap_set (ANTIC_IN (block));
3869 /* Sorting is not perfect, iterate locally. */
3870 while (do_pre_regular_insertion (block, dom, exprs))
3872 exprs.release ();
3873 if (do_partial_partial)
3875 exprs = sorted_array_from_bitmap_set (PA_IN (block));
3876 while (do_pre_partial_partial_insertion (block, dom,
3877 exprs))
3879 exprs.release ();
3885 /* Clear the NEW sets before the next iteration. We have already
3886 fully propagated its contents. */
3887 if (changed)
3888 FOR_ALL_BB_FN (bb, cfun)
3889 bitmap_set_free (NEW_SETS (bb));
3891 while (changed);
3893 statistics_histogram_event (cfun, "insert iterations", num_iterations);
3895 /* AVAIL_OUT is not needed after insertion so we don't have to
3896 propagate NEW_SETS from hoist insertion. */
3897 FOR_ALL_BB_FN (bb, cfun)
3899 bitmap_set_free (NEW_SETS (bb));
3900 bitmap_set_pool.remove (NEW_SETS (bb));
3901 NEW_SETS (bb) = NULL;
3904 /* Insert expressions for hoisting. Do a backward walk here since
3905 inserting into BLOCK exposes new opportunities in its predecessors.
3906 Since PRE and hoist insertions can cause back-to-back iteration
3907 and we are interested in PRE insertion exposed hoisting opportunities
3908 but not in hoisting exposed PRE ones do hoist insertion only after
3909 PRE insertion iteration finished and do not iterate it. */
3910 if (flag_code_hoisting)
3911 for (int idx = rpo_num - 1; idx >= 0; --idx)
3913 basic_block block = BASIC_BLOCK_FOR_FN (cfun, rpo[idx]);
3914 if (EDGE_COUNT (block->succs) >= 2)
3915 changed |= do_hoist_insertion (block);
3918 free (rpo);
3919 free (bb_rpo);
3923 /* Compute the AVAIL set for all basic blocks.
3925 This function performs value numbering of the statements in each basic
3926 block. The AVAIL sets are built from information we glean while doing
3927 this value numbering, since the AVAIL sets contain only one entry per
3928 value.
3930 AVAIL_IN[BLOCK] = AVAIL_OUT[dom(BLOCK)].
3931 AVAIL_OUT[BLOCK] = AVAIL_IN[BLOCK] U PHI_GEN[BLOCK] U TMP_GEN[BLOCK]. */
3933 static void
3934 compute_avail (function *fun)
3937 basic_block block, son;
3938 basic_block *worklist;
3939 size_t sp = 0;
3940 unsigned i;
3941 tree name;
3943 /* We pretend that default definitions are defined in the entry block.
3944 This includes function arguments and the static chain decl. */
3945 FOR_EACH_SSA_NAME (i, name, fun)
3947 pre_expr e;
3948 if (!SSA_NAME_IS_DEFAULT_DEF (name)
3949 || has_zero_uses (name)
3950 || virtual_operand_p (name))
3951 continue;
3953 e = get_or_alloc_expr_for_name (name);
3954 add_to_value (get_expr_value_id (e), e);
3955 bitmap_insert_into_set (TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (fun)), e);
3956 bitmap_value_insert_into_set (AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (fun)),
3960 if (dump_file && (dump_flags & TDF_DETAILS))
3962 print_bitmap_set (dump_file, TMP_GEN (ENTRY_BLOCK_PTR_FOR_FN (fun)),
3963 "tmp_gen", ENTRY_BLOCK);
3964 print_bitmap_set (dump_file, AVAIL_OUT (ENTRY_BLOCK_PTR_FOR_FN (fun)),
3965 "avail_out", ENTRY_BLOCK);
3968 /* Allocate the worklist. */
3969 worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
3971 /* Seed the algorithm by putting the dominator children of the entry
3972 block on the worklist. */
3973 for (son = first_dom_son (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (fun));
3974 son;
3975 son = next_dom_son (CDI_DOMINATORS, son))
3976 worklist[sp++] = son;
3978 BB_LIVE_VOP_ON_EXIT (ENTRY_BLOCK_PTR_FOR_FN (fun))
3979 = ssa_default_def (fun, gimple_vop (fun));
3981 /* Loop until the worklist is empty. */
3982 while (sp)
3984 gimple *stmt;
3985 basic_block dom;
3987 /* Pick a block from the worklist. */
3988 block = worklist[--sp];
3989 vn_context_bb = block;
3991 /* Initially, the set of available values in BLOCK is that of
3992 its immediate dominator. */
3993 dom = get_immediate_dominator (CDI_DOMINATORS, block);
3994 if (dom)
3996 bitmap_set_copy (AVAIL_OUT (block), AVAIL_OUT (dom));
3997 BB_LIVE_VOP_ON_EXIT (block) = BB_LIVE_VOP_ON_EXIT (dom);
4000 /* Generate values for PHI nodes. */
4001 for (gphi_iterator gsi = gsi_start_phis (block); !gsi_end_p (gsi);
4002 gsi_next (&gsi))
4004 tree result = gimple_phi_result (gsi.phi ());
4006 /* We have no need for virtual phis, as they don't represent
4007 actual computations. */
4008 if (virtual_operand_p (result))
4010 BB_LIVE_VOP_ON_EXIT (block) = result;
4011 continue;
4014 pre_expr e = get_or_alloc_expr_for_name (result);
4015 add_to_value (get_expr_value_id (e), e);
4016 bitmap_value_insert_into_set (AVAIL_OUT (block), e);
4017 bitmap_insert_into_set (PHI_GEN (block), e);
4020 BB_MAY_NOTRETURN (block) = 0;
4022 /* Now compute value numbers and populate value sets with all
4023 the expressions computed in BLOCK. */
4024 bool set_bb_may_notreturn = false;
4025 for (gimple_stmt_iterator gsi = gsi_start_bb (block); !gsi_end_p (gsi);
4026 gsi_next (&gsi))
4028 ssa_op_iter iter;
4029 tree op;
4031 stmt = gsi_stmt (gsi);
4033 if (set_bb_may_notreturn)
4035 BB_MAY_NOTRETURN (block) = 1;
4036 set_bb_may_notreturn = false;
4039 /* Cache whether the basic-block has any non-visible side-effect
4040 or control flow.
4041 If this isn't a call or it is the last stmt in the
4042 basic-block then the CFG represents things correctly. */
4043 if (is_gimple_call (stmt) && !stmt_ends_bb_p (stmt))
4045 /* Non-looping const functions always return normally.
4046 Otherwise the call might not return or have side-effects
4047 that forbids hoisting possibly trapping expressions
4048 before it. */
4049 int flags = gimple_call_flags (stmt);
4050 if (!(flags & (ECF_CONST|ECF_PURE))
4051 || (flags & ECF_LOOPING_CONST_OR_PURE)
4052 || stmt_can_throw_external (fun, stmt))
4053 /* Defer setting of BB_MAY_NOTRETURN to avoid it
4054 influencing the processing of the call itself. */
4055 set_bb_may_notreturn = true;
4058 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
4060 pre_expr e = get_or_alloc_expr_for_name (op);
4061 add_to_value (get_expr_value_id (e), e);
4062 bitmap_insert_into_set (TMP_GEN (block), e);
4063 bitmap_value_insert_into_set (AVAIL_OUT (block), e);
4066 if (gimple_vdef (stmt))
4067 BB_LIVE_VOP_ON_EXIT (block) = gimple_vdef (stmt);
4069 if (gimple_has_side_effects (stmt)
4070 || stmt_could_throw_p (fun, stmt)
4071 || is_gimple_debug (stmt))
4072 continue;
4074 FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
4076 if (ssa_undefined_value_p (op))
4077 continue;
4078 pre_expr e = get_or_alloc_expr_for_name (op);
4079 bitmap_value_insert_into_set (EXP_GEN (block), e);
4082 switch (gimple_code (stmt))
4084 case GIMPLE_RETURN:
4085 continue;
4087 case GIMPLE_CALL:
4089 vn_reference_t ref;
4090 vn_reference_s ref1;
4091 pre_expr result = NULL;
4093 vn_reference_lookup_call (as_a <gcall *> (stmt), &ref, &ref1);
4094 /* There is no point to PRE a call without a value. */
4095 if (!ref || !ref->result)
4096 continue;
4098 /* If the value of the call is not invalidated in
4099 this block until it is computed, add the expression
4100 to EXP_GEN. */
4101 if ((!gimple_vuse (stmt)
4102 || gimple_code
4103 (SSA_NAME_DEF_STMT (gimple_vuse (stmt))) == GIMPLE_PHI
4104 || gimple_bb (SSA_NAME_DEF_STMT
4105 (gimple_vuse (stmt))) != block)
4106 /* If the REFERENCE traps and there was a preceding
4107 point in the block that might not return avoid
4108 adding the reference to EXP_GEN. */
4109 && (!BB_MAY_NOTRETURN (block)
4110 || !vn_reference_may_trap (ref)))
4112 result = get_or_alloc_expr_for_reference
4113 (ref, gimple_location (stmt));
4114 add_to_value (get_expr_value_id (result), result);
4115 bitmap_value_insert_into_set (EXP_GEN (block), result);
4117 continue;
4120 case GIMPLE_ASSIGN:
4122 pre_expr result = NULL;
4123 switch (vn_get_stmt_kind (stmt))
4125 case VN_NARY:
4127 enum tree_code code = gimple_assign_rhs_code (stmt);
4128 vn_nary_op_t nary;
4130 /* COND_EXPR is awkward in that it contains an
4131 embedded complex expression.
4132 Don't even try to shove it through PRE. */
4133 if (code == COND_EXPR)
4134 continue;
4136 vn_nary_op_lookup_stmt (stmt, &nary);
4137 if (!nary || nary->predicated_values)
4138 continue;
4140 unsigned value_id = nary->value_id;
4141 if (value_id_constant_p (value_id))
4142 continue;
4144 /* Record the un-valueized expression for EXP_GEN. */
4145 nary = XALLOCAVAR (struct vn_nary_op_s,
4146 sizeof_vn_nary_op
4147 (vn_nary_length_from_stmt (stmt)));
4148 init_vn_nary_op_from_stmt (nary, as_a <gassign *> (stmt));
4150 /* If the NARY traps and there was a preceding
4151 point in the block that might not return avoid
4152 adding the nary to EXP_GEN. */
4153 if (BB_MAY_NOTRETURN (block)
4154 && vn_nary_may_trap (nary))
4155 continue;
4157 result = get_or_alloc_expr_for_nary
4158 (nary, value_id, gimple_location (stmt));
4159 break;
4162 case VN_REFERENCE:
4164 tree rhs1 = gimple_assign_rhs1 (stmt);
4165 ao_ref rhs1_ref;
4166 ao_ref_init (&rhs1_ref, rhs1);
4167 alias_set_type set = ao_ref_alias_set (&rhs1_ref);
4168 alias_set_type base_set
4169 = ao_ref_base_alias_set (&rhs1_ref);
4170 vec<vn_reference_op_s> operands
4171 = vn_reference_operands_for_lookup (rhs1);
4172 vn_reference_t ref;
4173 vn_reference_lookup_pieces (gimple_vuse (stmt), set,
4174 base_set, TREE_TYPE (rhs1),
4175 operands, &ref, VN_WALK);
4176 if (!ref)
4178 operands.release ();
4179 continue;
4182 /* If the REFERENCE traps and there was a preceding
4183 point in the block that might not return avoid
4184 adding the reference to EXP_GEN. */
4185 if (BB_MAY_NOTRETURN (block)
4186 && vn_reference_may_trap (ref))
4188 operands.release ();
4189 continue;
4192 /* If the value of the reference is not invalidated in
4193 this block until it is computed, add the expression
4194 to EXP_GEN. */
4195 if (gimple_vuse (stmt))
4197 gimple *def_stmt;
4198 bool ok = true;
4199 def_stmt = SSA_NAME_DEF_STMT (gimple_vuse (stmt));
4200 while (!gimple_nop_p (def_stmt)
4201 && gimple_code (def_stmt) != GIMPLE_PHI
4202 && gimple_bb (def_stmt) == block)
4204 if (stmt_may_clobber_ref_p
4205 (def_stmt, gimple_assign_rhs1 (stmt)))
4207 ok = false;
4208 break;
4210 def_stmt
4211 = SSA_NAME_DEF_STMT (gimple_vuse (def_stmt));
4213 if (!ok)
4215 operands.release ();
4216 continue;
4220 /* If the load was value-numbered to another
4221 load make sure we do not use its expression
4222 for insertion if it wouldn't be a valid
4223 replacement. */
4224 /* At the momemt we have a testcase
4225 for hoist insertion of aligned vs. misaligned
4226 variants in gcc.dg/torture/pr65270-1.c thus
4227 with just alignment to be considered we can
4228 simply replace the expression in the hashtable
4229 with the most conservative one. */
4230 vn_reference_op_t ref1 = &ref->operands.last ();
4231 while (ref1->opcode != TARGET_MEM_REF
4232 && ref1->opcode != MEM_REF
4233 && ref1 != &ref->operands[0])
4234 --ref1;
4235 vn_reference_op_t ref2 = &operands.last ();
4236 while (ref2->opcode != TARGET_MEM_REF
4237 && ref2->opcode != MEM_REF
4238 && ref2 != &operands[0])
4239 --ref2;
4240 if ((ref1->opcode == TARGET_MEM_REF
4241 || ref1->opcode == MEM_REF)
4242 && (TYPE_ALIGN (ref1->type)
4243 > TYPE_ALIGN (ref2->type)))
4244 ref1->type
4245 = build_aligned_type (ref1->type,
4246 TYPE_ALIGN (ref2->type));
4247 /* TBAA behavior is an obvious part so make sure
4248 that the hashtable one covers this as well
4249 by adjusting the ref alias set and its base. */
4250 if ((ref->set == set
4251 || alias_set_subset_of (set, ref->set))
4252 && (ref->base_set == base_set
4253 || alias_set_subset_of (base_set, ref->base_set)))
4255 else if (ref1->opcode != ref2->opcode
4256 || (ref1->opcode != MEM_REF
4257 && ref1->opcode != TARGET_MEM_REF))
4259 /* With mismatching base opcodes or bases
4260 other than MEM_REF or TARGET_MEM_REF we
4261 can't do any easy TBAA adjustment. */
4262 operands.release ();
4263 continue;
4265 else if (ref->set == set
4266 || alias_set_subset_of (ref->set, set))
4268 tree reft = reference_alias_ptr_type (rhs1);
4269 ref->set = set;
4270 ref->base_set = set;
4271 if (ref1->opcode == MEM_REF)
4272 ref1->op0
4273 = wide_int_to_tree (reft,
4274 wi::to_wide (ref1->op0));
4275 else
4276 ref1->op2
4277 = wide_int_to_tree (reft,
4278 wi::to_wide (ref1->op2));
4280 else
4282 ref->set = 0;
4283 ref->base_set = 0;
4284 if (ref1->opcode == MEM_REF)
4285 ref1->op0
4286 = wide_int_to_tree (ptr_type_node,
4287 wi::to_wide (ref1->op0));
4288 else
4289 ref1->op2
4290 = wide_int_to_tree (ptr_type_node,
4291 wi::to_wide (ref1->op2));
4293 /* We also need to make sure that the access path
4294 ends in an access of the same size as otherwise
4295 we might assume an access may not trap while in
4296 fact it might. That's independent of whether
4297 TBAA is in effect. */
4298 if (TYPE_SIZE (ref1->type) != TYPE_SIZE (ref2->type)
4299 && (! TYPE_SIZE (ref1->type)
4300 || ! TYPE_SIZE (ref2->type)
4301 || ! operand_equal_p (TYPE_SIZE (ref1->type),
4302 TYPE_SIZE (ref2->type))))
4304 operands.release ();
4305 continue;
4307 operands.release ();
4309 result = get_or_alloc_expr_for_reference
4310 (ref, gimple_location (stmt));
4311 break;
4314 default:
4315 continue;
4318 add_to_value (get_expr_value_id (result), result);
4319 bitmap_value_insert_into_set (EXP_GEN (block), result);
4320 continue;
4322 default:
4323 break;
4326 if (set_bb_may_notreturn)
4328 BB_MAY_NOTRETURN (block) = 1;
4329 set_bb_may_notreturn = false;
4332 if (dump_file && (dump_flags & TDF_DETAILS))
4334 print_bitmap_set (dump_file, EXP_GEN (block),
4335 "exp_gen", block->index);
4336 print_bitmap_set (dump_file, PHI_GEN (block),
4337 "phi_gen", block->index);
4338 print_bitmap_set (dump_file, TMP_GEN (block),
4339 "tmp_gen", block->index);
4340 print_bitmap_set (dump_file, AVAIL_OUT (block),
4341 "avail_out", block->index);
4344 /* Put the dominator children of BLOCK on the worklist of blocks
4345 to compute available sets for. */
4346 for (son = first_dom_son (CDI_DOMINATORS, block);
4347 son;
4348 son = next_dom_son (CDI_DOMINATORS, son))
4349 worklist[sp++] = son;
4351 vn_context_bb = NULL;
4353 free (worklist);
4357 /* Initialize data structures used by PRE. */
4359 static void
4360 init_pre (void)
4362 basic_block bb;
4364 next_expression_id = 1;
4365 expressions.create (0);
4366 expressions.safe_push (NULL);
4367 value_expressions.create (get_max_value_id () + 1);
4368 value_expressions.quick_grow_cleared (get_max_value_id () + 1);
4369 constant_value_expressions.create (get_max_constant_value_id () + 1);
4370 constant_value_expressions.quick_grow_cleared (get_max_constant_value_id () + 1);
4371 name_to_id.create (0);
4372 gcc_obstack_init (&pre_expr_obstack);
4374 inserted_exprs = BITMAP_ALLOC (NULL);
4376 connect_infinite_loops_to_exit ();
4377 memset (&pre_stats, 0, sizeof (pre_stats));
4379 alloc_aux_for_blocks (sizeof (struct bb_bitmap_sets));
4381 calculate_dominance_info (CDI_DOMINATORS);
4383 bitmap_obstack_initialize (&grand_bitmap_obstack);
4384 expression_to_id = new hash_table<pre_expr_d> (num_ssa_names * 3);
4385 FOR_ALL_BB_FN (bb, cfun)
4387 EXP_GEN (bb) = bitmap_set_new ();
4388 PHI_GEN (bb) = bitmap_set_new ();
4389 TMP_GEN (bb) = bitmap_set_new ();
4390 AVAIL_OUT (bb) = bitmap_set_new ();
4391 PHI_TRANS_TABLE (bb) = NULL;
4396 /* Deallocate data structures used by PRE. */
4398 static void
4399 fini_pre ()
4401 value_expressions.release ();
4402 constant_value_expressions.release ();
4403 expressions.release ();
4404 bitmap_obstack_release (&grand_bitmap_obstack);
4405 bitmap_set_pool.release ();
4406 pre_expr_pool.release ();
4407 delete expression_to_id;
4408 expression_to_id = NULL;
4409 name_to_id.release ();
4410 obstack_free (&pre_expr_obstack, NULL);
4412 basic_block bb;
4413 FOR_ALL_BB_FN (bb, cfun)
4414 if (bb->aux && PHI_TRANS_TABLE (bb))
4415 delete PHI_TRANS_TABLE (bb);
4416 free_aux_for_blocks ();
4419 namespace {
4421 const pass_data pass_data_pre =
4423 GIMPLE_PASS, /* type */
4424 "pre", /* name */
4425 OPTGROUP_NONE, /* optinfo_flags */
4426 TV_TREE_PRE, /* tv_id */
4427 ( PROP_cfg | PROP_ssa ), /* properties_required */
4428 0, /* properties_provided */
4429 0, /* properties_destroyed */
4430 TODO_rebuild_alias, /* todo_flags_start */
4431 0, /* todo_flags_finish */
4434 class pass_pre : public gimple_opt_pass
4436 public:
4437 pass_pre (gcc::context *ctxt)
4438 : gimple_opt_pass (pass_data_pre, ctxt)
4441 /* opt_pass methods: */
4442 bool gate (function *) final override
4443 { return flag_tree_pre != 0 || flag_code_hoisting != 0; }
4444 unsigned int execute (function *) final override;
4446 }; // class pass_pre
4448 /* Valueization hook for RPO VN when we are calling back to it
4449 at ANTIC compute time. */
4451 static tree
4452 pre_valueize (tree name)
4454 if (TREE_CODE (name) == SSA_NAME)
4456 tree tem = VN_INFO (name)->valnum;
4457 if (tem != VN_TOP && tem != name)
4459 if (TREE_CODE (tem) != SSA_NAME
4460 || SSA_NAME_IS_DEFAULT_DEF (tem))
4461 return tem;
4462 /* We create temporary SSA names for representatives that
4463 do not have a definition (yet) but are not default defs either
4464 assume they are fine to use. */
4465 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (tem));
4466 if (! def_bb
4467 || dominated_by_p (CDI_DOMINATORS, vn_context_bb, def_bb))
4468 return tem;
4469 /* ??? Now we could look for a leader. Ideally we'd somehow
4470 expose RPO VN leaders and get rid of AVAIL_OUT as well... */
4473 return name;
4476 unsigned int
4477 pass_pre::execute (function *fun)
4479 unsigned int todo = 0;
4481 do_partial_partial =
4482 flag_tree_partial_pre && optimize_function_for_speed_p (fun);
4484 /* This has to happen before VN runs because
4485 loop_optimizer_init may create new phis, etc. */
4486 loop_optimizer_init (LOOPS_NORMAL);
4487 split_edges_for_insertion ();
4488 scev_initialize ();
4489 calculate_dominance_info (CDI_DOMINATORS);
4491 run_rpo_vn (VN_WALK);
4493 init_pre ();
4495 vn_valueize = pre_valueize;
4497 /* Insert can get quite slow on an incredibly large number of basic
4498 blocks due to some quadratic behavior. Until this behavior is
4499 fixed, don't run it when he have an incredibly large number of
4500 bb's. If we aren't going to run insert, there is no point in
4501 computing ANTIC, either, even though it's plenty fast nor do
4502 we require AVAIL. */
4503 if (n_basic_blocks_for_fn (fun) < 4000)
4505 compute_avail (fun);
4506 compute_antic ();
4507 insert ();
4510 /* Make sure to remove fake edges before committing our inserts.
4511 This makes sure we don't end up with extra critical edges that
4512 we would need to split. */
4513 remove_fake_exit_edges ();
4514 gsi_commit_edge_inserts ();
4516 /* Eliminate folds statements which might (should not...) end up
4517 not keeping virtual operands up-to-date. */
4518 gcc_assert (!need_ssa_update_p (fun));
4520 statistics_counter_event (fun, "Insertions", pre_stats.insertions);
4521 statistics_counter_event (fun, "PA inserted", pre_stats.pa_insert);
4522 statistics_counter_event (fun, "HOIST inserted", pre_stats.hoist_insert);
4523 statistics_counter_event (fun, "New PHIs", pre_stats.phis);
4525 todo |= eliminate_with_rpo_vn (inserted_exprs);
4527 vn_valueize = NULL;
4529 fini_pre ();
4531 scev_finalize ();
4532 loop_optimizer_finalize ();
4534 /* Perform a CFG cleanup before we run simple_dce_from_worklist since
4535 unreachable code regions will have not up-to-date SSA form which
4536 confuses it. */
4537 bool need_crit_edge_split = false;
4538 if (todo & TODO_cleanup_cfg)
4540 cleanup_tree_cfg ();
4541 need_crit_edge_split = true;
4544 /* Because we don't follow exactly the standard PRE algorithm, and decide not
4545 to insert PHI nodes sometimes, and because value numbering of casts isn't
4546 perfect, we sometimes end up inserting dead code. This simple DCE-like
4547 pass removes any insertions we made that weren't actually used. */
4548 simple_dce_from_worklist (inserted_exprs);
4549 BITMAP_FREE (inserted_exprs);
4551 /* TODO: tail_merge_optimize may merge all predecessors of a block, in which
4552 case we can merge the block with the remaining predecessor of the block.
4553 It should either:
4554 - call merge_blocks after each tail merge iteration
4555 - call merge_blocks after all tail merge iterations
4556 - mark TODO_cleanup_cfg when necessary. */
4557 todo |= tail_merge_optimize (need_crit_edge_split);
4559 free_rpo_vn ();
4561 /* Tail merging invalidates the virtual SSA web, together with
4562 cfg-cleanup opportunities exposed by PRE this will wreck the
4563 SSA updating machinery. So make sure to run update-ssa
4564 manually, before eventually scheduling cfg-cleanup as part of
4565 the todo. */
4566 update_ssa (TODO_update_ssa_only_virtuals);
4568 return todo;
4571 } // anon namespace
4573 gimple_opt_pass *
4574 make_pass_pre (gcc::context *ctxt)
4576 return new pass_pre (ctxt);