x86: Add a test for PR rtl-optimization/111673
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.cc
blob989321137df93349f8395a9f746f73eda52fd74f
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated.
69 For the targets supporting low-overhead loops, IVOPTs has to take care of
70 the loops which will probably be transformed in RTL doloop optimization,
71 to try to make selected IV candidate set optimal. The process of doloop
72 support includes:
74 1) Analyze the current loop will be transformed to doloop or not, find and
75 mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 The target hook predict_doloop_p can be used for target specific checks.
80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 like biv. For cost determination between doloop IV cand and IV use, the
83 target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 provided to add on extra costs for generic type and address type IV use.
85 Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 use, and bound zero is set for IV elimination.
88 3) With the cost setting in step 2), the current cost model based IV
89 selection algorithm will process as usual, pick up doloop dedicated IV if
90 profitable. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 #include "cfganal.h"
136 /* For lang_hooks.types.type_for_mode. */
137 #include "langhooks.h"
139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
140 cost of different addressing modes. This should be moved to a TBD
141 interface between the GIMPLE and RTL worlds. */
143 /* The infinite cost. */
144 #define INFTY 1000000000
146 /* Returns the expected number of loop iterations for LOOP.
147 The average trip count is computed from profile data if it
148 exists. */
150 static inline HOST_WIDE_INT
151 avg_loop_niter (class loop *loop)
153 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154 if (niter == -1)
156 niter = likely_max_stmt_executions_int (loop);
158 if (niter == -1 || niter > param_avg_loop_niter)
159 return param_avg_loop_niter;
162 return niter;
165 struct iv_use;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
175 bool biv_p; /* Is it a biv? */
176 bool no_overflow; /* True if the iv doesn't overflow. */
177 bool have_address_use;/* For biv, indicate if it's used in any address
178 type use. */
181 /* Per-ssa version information (induction variable descriptions, etc.). */
182 struct version_info
184 tree name; /* The ssa name. */
185 struct iv *iv; /* Induction variable description. */
186 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
187 an expression that is not an induction variable. */
188 bool preserve_biv; /* For the original biv, whether to preserve it. */
189 unsigned inv_id; /* Id of an invariant. */
192 /* Types of uses. */
193 enum use_type
195 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
196 USE_REF_ADDRESS, /* Use is an address for an explicit memory
197 reference. */
198 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
199 cases where the expansion of the function
200 will turn the argument into a normal address. */
201 USE_COMPARE /* Use is a compare. */
204 /* Cost of a computation. */
205 class comp_cost
207 public:
208 comp_cost (): cost (0), complexity (0), scratch (0)
211 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
212 : cost (cost), complexity (complexity), scratch (scratch)
215 /* Returns true if COST is infinite. */
216 bool infinite_cost_p ();
218 /* Adds costs COST1 and COST2. */
219 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 /* Adds COST to the comp_cost. */
222 comp_cost operator+= (comp_cost cost);
224 /* Adds constant C to this comp_cost. */
225 comp_cost operator+= (HOST_WIDE_INT c);
227 /* Subtracts constant C to this comp_cost. */
228 comp_cost operator-= (HOST_WIDE_INT c);
230 /* Divide the comp_cost by constant C. */
231 comp_cost operator/= (HOST_WIDE_INT c);
233 /* Multiply the comp_cost by constant C. */
234 comp_cost operator*= (HOST_WIDE_INT c);
236 /* Subtracts costs COST1 and COST2. */
237 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 /* Subtracts COST from this comp_cost. */
240 comp_cost operator-= (comp_cost cost);
242 /* Returns true if COST1 is smaller than COST2. */
243 friend bool operator< (comp_cost cost1, comp_cost cost2);
245 /* Returns true if COST1 and COST2 are equal. */
246 friend bool operator== (comp_cost cost1, comp_cost cost2);
248 /* Returns true if COST1 is smaller or equal than COST2. */
249 friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 int64_t cost; /* The runtime cost. */
252 unsigned complexity; /* The estimate of the complexity of the code for
253 the computation (in no concrete units --
254 complexity field should be larger for more
255 complex expressions and addressing modes). */
256 int64_t scratch; /* Scratch used during cost computation. */
259 static const comp_cost no_cost;
260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 bool
263 comp_cost::infinite_cost_p ()
265 return cost == INFTY;
268 comp_cost
269 operator+ (comp_cost cost1, comp_cost cost2)
271 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272 return infinite_cost;
274 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275 cost1.cost += cost2.cost;
276 cost1.complexity += cost2.complexity;
278 return cost1;
281 comp_cost
282 operator- (comp_cost cost1, comp_cost cost2)
284 if (cost1.infinite_cost_p ())
285 return infinite_cost;
287 gcc_assert (!cost2.infinite_cost_p ());
288 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 cost1.cost -= cost2.cost;
291 cost1.complexity -= cost2.complexity;
293 return cost1;
296 comp_cost
297 comp_cost::operator+= (comp_cost cost)
299 *this = *this + cost;
300 return *this;
303 comp_cost
304 comp_cost::operator+= (HOST_WIDE_INT c)
306 if (c >= INFTY)
307 this->cost = INFTY;
309 if (infinite_cost_p ())
310 return *this;
312 gcc_assert (this->cost + c < infinite_cost.cost);
313 this->cost += c;
315 return *this;
318 comp_cost
319 comp_cost::operator-= (HOST_WIDE_INT c)
321 if (infinite_cost_p ())
322 return *this;
324 gcc_assert (this->cost - c < infinite_cost.cost);
325 this->cost -= c;
327 return *this;
330 comp_cost
331 comp_cost::operator/= (HOST_WIDE_INT c)
333 gcc_assert (c != 0);
334 if (infinite_cost_p ())
335 return *this;
337 this->cost /= c;
339 return *this;
342 comp_cost
343 comp_cost::operator*= (HOST_WIDE_INT c)
345 if (infinite_cost_p ())
346 return *this;
348 gcc_assert (this->cost * c < infinite_cost.cost);
349 this->cost *= c;
351 return *this;
354 comp_cost
355 comp_cost::operator-= (comp_cost cost)
357 *this = *this - cost;
358 return *this;
361 bool
362 operator< (comp_cost cost1, comp_cost cost2)
364 if (cost1.cost == cost2.cost)
365 return cost1.complexity < cost2.complexity;
367 return cost1.cost < cost2.cost;
370 bool
371 operator== (comp_cost cost1, comp_cost cost2)
373 return cost1.cost == cost2.cost
374 && cost1.complexity == cost2.complexity;
377 bool
378 operator<= (comp_cost cost1, comp_cost cost2)
380 return cost1 < cost2 || cost1 == cost2;
383 struct iv_inv_expr_ent;
385 /* The candidate - cost pair. */
386 class cost_pair
388 public:
389 struct iv_cand *cand; /* The candidate. */
390 comp_cost cost; /* The cost. */
391 enum tree_code comp; /* For iv elimination, the comparison. */
392 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
393 preserved when representing iv_use with iv_cand. */
394 bitmap inv_exprs; /* The list of newly created invariant expressions
395 when representing iv_use with iv_cand. */
396 tree value; /* For final value elimination, the expression for
397 the final value of the iv. For iv elimination,
398 the new bound to compare with. */
401 /* Use. */
402 struct iv_use
404 unsigned id; /* The id of the use. */
405 unsigned group_id; /* The group id the use belongs to. */
406 enum use_type type; /* Type of the use. */
407 tree mem_type; /* The memory type to use when testing whether an
408 address is legitimate, and what the address's
409 cost is. */
410 struct iv *iv; /* The induction variable it is based on. */
411 gimple *stmt; /* Statement in that it occurs. */
412 tree *op_p; /* The place where it occurs. */
414 tree addr_base; /* Base address with const offset stripped. */
415 poly_uint64 addr_offset;
416 /* Const offset stripped from base address. */
419 /* Group of uses. */
420 struct iv_group
422 /* The id of the group. */
423 unsigned id;
424 /* Uses of the group are of the same type. */
425 enum use_type type;
426 /* The set of "related" IV candidates, plus the important ones. */
427 bitmap related_cands;
428 /* Number of IV candidates in the cost_map. */
429 unsigned n_map_members;
430 /* The costs wrto the iv candidates. */
431 class cost_pair *cost_map;
432 /* The selected candidate for the group. */
433 struct iv_cand *selected;
434 /* To indicate this is a doloop use group. */
435 bool doloop_p;
436 /* Uses in the group. */
437 vec<struct iv_use *> vuses;
440 /* The position where the iv is computed. */
441 enum iv_position
443 IP_NORMAL, /* At the end, just before the exit condition. */
444 IP_END, /* At the end of the latch block. */
445 IP_BEFORE_USE, /* Immediately before a specific use. */
446 IP_AFTER_USE, /* Immediately after a specific use. */
447 IP_ORIGINAL /* The original biv. */
450 /* The induction variable candidate. */
451 struct iv_cand
453 unsigned id; /* The number of the candidate. */
454 bool important; /* Whether this is an "important" candidate, i.e. such
455 that it should be considered by all uses. */
456 bool involves_undefs; /* Whether the IV involves undefined values. */
457 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
458 gimple *incremented_at;/* For original biv, the statement where it is
459 incremented. */
460 tree var_before; /* The variable used for it before increment. */
461 tree var_after; /* The variable used for it after increment. */
462 struct iv *iv; /* The value of the candidate. NULL for
463 "pseudocandidate" used to indicate the possibility
464 to replace the final value of an iv by direct
465 computation of the value. */
466 unsigned cost; /* Cost of the candidate. */
467 unsigned cost_step; /* Cost of the candidate's increment operation. */
468 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
469 where it is incremented. */
470 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
471 iv_cand. */
472 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
473 handle it as a new invariant expression which will
474 be hoisted out of loop. */
475 struct iv *orig_iv; /* The original iv if this cand is added from biv with
476 smaller type. */
477 bool doloop_p; /* Whether this is a doloop candidate. */
480 /* Hashtable entry for common candidate derived from iv uses. */
481 class iv_common_cand
483 public:
484 tree base;
485 tree step;
486 /* IV uses from which this common candidate is derived. */
487 auto_vec<struct iv_use *> uses;
488 hashval_t hash;
491 /* Hashtable helpers. */
493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 static inline hashval_t hash (const iv_common_cand *);
496 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
499 /* Hash function for possible common candidates. */
501 inline hashval_t
502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 return ccand->hash;
507 /* Hash table equality function for common candidates. */
509 inline bool
510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
511 const iv_common_cand *ccand2)
513 return (ccand1->hash == ccand2->hash
514 && operand_equal_p (ccand1->base, ccand2->base, 0)
515 && operand_equal_p (ccand1->step, ccand2->step, 0)
516 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
517 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
520 /* Loop invariant expression hashtable entry. */
522 struct iv_inv_expr_ent
524 /* Tree expression of the entry. */
525 tree expr;
526 /* Unique indentifier. */
527 int id;
528 /* Hash value. */
529 hashval_t hash;
532 /* Sort iv_inv_expr_ent pair A and B by id field. */
534 static int
535 sort_iv_inv_expr_ent (const void *a, const void *b)
537 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
538 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 unsigned id1 = (*e1)->id;
541 unsigned id2 = (*e2)->id;
543 if (id1 < id2)
544 return -1;
545 else if (id1 > id2)
546 return 1;
547 else
548 return 0;
551 /* Hashtable helpers. */
553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 static inline hashval_t hash (const iv_inv_expr_ent *);
556 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
559 /* Return true if uses of type TYPE represent some form of address. */
561 inline bool
562 address_p (use_type type)
564 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
567 /* Hash function for loop invariant expressions. */
569 inline hashval_t
570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 return expr->hash;
575 /* Hash table equality function for expressions. */
577 inline bool
578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
579 const iv_inv_expr_ent *expr2)
581 return expr1->hash == expr2->hash
582 && operand_equal_p (expr1->expr, expr2->expr, 0);
585 struct ivopts_data
587 /* The currently optimized loop. */
588 class loop *current_loop;
589 location_t loop_loc;
591 /* Numbers of iterations for all exits of the current loop. */
592 hash_map<edge, tree_niter_desc *> *niters;
594 /* Number of registers used in it. */
595 unsigned regs_used;
597 /* The size of version_info array allocated. */
598 unsigned version_info_size;
600 /* The array of information for the ssa names. */
601 struct version_info *version_info;
603 /* The hashtable of loop invariant expressions created
604 by ivopt. */
605 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 /* The bitmap of indices in version_info whose value was changed. */
608 bitmap relevant;
610 /* The uses of induction variables. */
611 vec<iv_group *> vgroups;
613 /* The candidates. */
614 vec<iv_cand *> vcands;
616 /* A bitmap of important candidates. */
617 bitmap important_candidates;
619 /* Cache used by tree_to_aff_combination_expand. */
620 hash_map<tree, name_expansion *> *name_expansion_cache;
622 /* The hashtable of common candidates derived from iv uses. */
623 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 /* The common candidates. */
626 vec<iv_common_cand *> iv_common_cands;
628 /* Hash map recording base object information of tree exp. */
629 hash_map<tree, tree> *base_object_map;
631 /* The maximum invariant variable id. */
632 unsigned max_inv_var_id;
634 /* The maximum invariant expression id. */
635 unsigned max_inv_expr_id;
637 /* Number of no_overflow BIVs which are not used in memory address. */
638 unsigned bivs_not_used_in_addr;
640 /* Obstack for iv structure. */
641 struct obstack iv_obstack;
643 /* Whether to consider just related and important candidates when replacing a
644 use. */
645 bool consider_all_candidates;
647 /* Are we optimizing for speed? */
648 bool speed;
650 /* Whether the loop body includes any function calls. */
651 bool body_includes_call;
653 /* Whether the loop body can only be exited via single exit. */
654 bool loop_single_exit_p;
656 /* Whether the loop has doloop comparison use. */
657 bool doloop_use_p;
660 /* An assignment of iv candidates to uses. */
662 class iv_ca
664 public:
665 /* The number of uses covered by the assignment. */
666 unsigned upto;
668 /* Number of uses that cannot be expressed by the candidates in the set. */
669 unsigned bad_groups;
671 /* Candidate assigned to a use, together with the related costs. */
672 class cost_pair **cand_for_group;
674 /* Number of times each candidate is used. */
675 unsigned *n_cand_uses;
677 /* The candidates used. */
678 bitmap cands;
680 /* The number of candidates in the set. */
681 unsigned n_cands;
683 /* The number of invariants needed, including both invariant variants and
684 invariant expressions. */
685 unsigned n_invs;
687 /* Total cost of expressing uses. */
688 comp_cost cand_use_cost;
690 /* Total cost of candidates. */
691 int64_t cand_cost;
693 /* Number of times each invariant variable is used. */
694 unsigned *n_inv_var_uses;
696 /* Number of times each invariant expression is used. */
697 unsigned *n_inv_expr_uses;
699 /* Total cost of the assignment. */
700 comp_cost cost;
703 /* Difference of two iv candidate assignments. */
705 struct iv_ca_delta
707 /* Changed group. */
708 struct iv_group *group;
710 /* An old assignment (for rollback purposes). */
711 class cost_pair *old_cp;
713 /* A new assignment. */
714 class cost_pair *new_cp;
716 /* Next change in the list. */
717 struct iv_ca_delta *next;
720 /* Bound on number of candidates below that all candidates are considered. */
722 #define CONSIDER_ALL_CANDIDATES_BOUND \
723 ((unsigned) param_iv_consider_all_candidates_bound)
725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
726 optimizing such a loop would help, and it would take ages). */
728 #define MAX_CONSIDERED_GROUPS \
729 ((unsigned) param_iv_max_considered_uses)
731 /* If there are at most this number of ivs in the set, try removing unnecessary
732 ivs from the set always. */
734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
735 ((unsigned) param_iv_always_prune_cand_set_bound)
737 /* The list of trees for that the decl_rtl field must be reset is stored
738 here. */
740 static vec<tree> decl_rtl_to_reset;
742 static comp_cost force_expr_to_var_cost (tree, bool);
744 /* The single loop exit if it dominates the latch, NULL otherwise. */
746 edge
747 single_dom_exit (class loop *loop)
749 edge exit = single_exit (loop);
751 if (!exit)
752 return NULL;
754 if (!just_once_each_iteration_p (loop, exit->src))
755 return NULL;
757 return exit;
760 /* Dumps information about the induction variable IV to FILE. Don't dump
761 variable's name if DUMP_NAME is FALSE. The information is dumped with
762 preceding spaces indicated by INDENT_LEVEL. */
764 void
765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 const char *p;
768 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 if (indent_level > 4)
771 indent_level = 4;
772 p = spaces + 8 - (indent_level << 1);
774 fprintf (file, "%sIV struct:\n", p);
775 if (iv->ssa_name && dump_name)
777 fprintf (file, "%s SSA_NAME:\t", p);
778 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
779 fprintf (file, "\n");
782 fprintf (file, "%s Type:\t", p);
783 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
784 fprintf (file, "\n");
786 fprintf (file, "%s Base:\t", p);
787 print_generic_expr (file, iv->base, TDF_SLIM);
788 fprintf (file, "\n");
790 fprintf (file, "%s Step:\t", p);
791 print_generic_expr (file, iv->step, TDF_SLIM);
792 fprintf (file, "\n");
794 if (iv->base_object)
796 fprintf (file, "%s Object:\t", p);
797 print_generic_expr (file, iv->base_object, TDF_SLIM);
798 fprintf (file, "\n");
801 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
804 p, iv->no_overflow ? "No-overflow" : "Overflow");
807 /* Dumps information about the USE to FILE. */
809 void
810 dump_use (FILE *file, struct iv_use *use)
812 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
813 fprintf (file, " At stmt:\t");
814 print_gimple_stmt (file, use->stmt, 0);
815 fprintf (file, " At pos:\t");
816 if (use->op_p)
817 print_generic_expr (file, *use->op_p, TDF_SLIM);
818 fprintf (file, "\n");
819 dump_iv (file, use->iv, false, 2);
822 /* Dumps information about the uses to FILE. */
824 void
825 dump_groups (FILE *file, struct ivopts_data *data)
827 unsigned i, j;
828 struct iv_group *group;
830 for (i = 0; i < data->vgroups.length (); i++)
832 group = data->vgroups[i];
833 fprintf (file, "Group %d:\n", group->id);
834 if (group->type == USE_NONLINEAR_EXPR)
835 fprintf (file, " Type:\tGENERIC\n");
836 else if (group->type == USE_REF_ADDRESS)
837 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
838 else if (group->type == USE_PTR_ADDRESS)
839 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
840 else
842 gcc_assert (group->type == USE_COMPARE);
843 fprintf (file, " Type:\tCOMPARE\n");
845 for (j = 0; j < group->vuses.length (); j++)
846 dump_use (file, group->vuses[j]);
850 /* Dumps information about induction variable candidate CAND to FILE. */
852 void
853 dump_cand (FILE *file, struct iv_cand *cand)
855 struct iv *iv = cand->iv;
857 fprintf (file, "Candidate %d:\n", cand->id);
858 if (cand->inv_vars)
860 fprintf (file, " Depend on inv.vars: ");
861 dump_bitmap (file, cand->inv_vars);
863 if (cand->inv_exprs)
865 fprintf (file, " Depend on inv.exprs: ");
866 dump_bitmap (file, cand->inv_exprs);
869 if (cand->var_before)
871 fprintf (file, " Var befor: ");
872 print_generic_expr (file, cand->var_before, TDF_SLIM);
873 fprintf (file, "\n");
875 if (cand->var_after)
877 fprintf (file, " Var after: ");
878 print_generic_expr (file, cand->var_after, TDF_SLIM);
879 fprintf (file, "\n");
882 switch (cand->pos)
884 case IP_NORMAL:
885 fprintf (file, " Incr POS: before exit test\n");
886 break;
888 case IP_BEFORE_USE:
889 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
890 break;
892 case IP_AFTER_USE:
893 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
894 break;
896 case IP_END:
897 fprintf (file, " Incr POS: at end\n");
898 break;
900 case IP_ORIGINAL:
901 fprintf (file, " Incr POS: orig biv\n");
902 break;
905 dump_iv (file, iv, false, 1);
908 /* Returns the info for ssa version VER. */
910 static inline struct version_info *
911 ver_info (struct ivopts_data *data, unsigned ver)
913 return data->version_info + ver;
916 /* Returns the info for ssa name NAME. */
918 static inline struct version_info *
919 name_info (struct ivopts_data *data, tree name)
921 return ver_info (data, SSA_NAME_VERSION (name));
924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
925 emitted in LOOP. */
927 static bool
928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 gcc_assert (bb);
934 if (sbb == loop->latch)
935 return true;
937 if (sbb != bb)
938 return false;
940 return stmt == last_nondebug_stmt (bb);
943 /* Returns true if STMT if after the place where the original induction
944 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
945 if the positions are identical. */
947 static bool
948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 basic_block cand_bb = gimple_bb (cand->incremented_at);
951 basic_block stmt_bb = gimple_bb (stmt);
953 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
954 return false;
956 if (stmt_bb != cand_bb)
957 return true;
959 if (true_if_equal
960 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
961 return true;
962 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
965 /* Returns true if STMT if after the place where the induction variable
966 CAND is incremented in LOOP. */
968 static bool
969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 switch (cand->pos)
973 case IP_END:
974 return false;
976 case IP_NORMAL:
977 return stmt_after_ip_normal_pos (loop, stmt);
979 case IP_ORIGINAL:
980 case IP_AFTER_USE:
981 return stmt_after_inc_pos (cand, stmt, false);
983 case IP_BEFORE_USE:
984 return stmt_after_inc_pos (cand, stmt, true);
986 default:
987 gcc_unreachable ();
991 /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 static tree
994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 if (TREE_CODE (*tp) == SSA_NAME
997 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
998 return *tp;
1000 if (!EXPR_P (*tp))
1001 *walk_subtrees = 0;
1003 return NULL_TREE;
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007 abnormal phi node. */
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1012 return walk_tree_without_duplicates
1013 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1016 /* Returns the structure describing number of iterations determined from
1017 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1022 class tree_niter_desc *desc;
1023 tree_niter_desc **slot;
1025 if (!data->niters)
1027 data->niters = new hash_map<edge, tree_niter_desc *>;
1028 slot = NULL;
1030 else
1031 slot = data->niters->get (exit);
1033 if (!slot)
1035 /* Try to determine number of iterations. We cannot safely work with ssa
1036 names that appear in phi nodes on abnormal edges, so that we do not
1037 create overlapping life ranges for them (PR 27283). */
1038 desc = XNEW (class tree_niter_desc);
1039 ::new (static_cast<void*> (desc)) tree_niter_desc ();
1040 if (!number_of_iterations_exit (data->current_loop,
1041 exit, desc, true)
1042 || contains_abnormal_ssa_name_p (desc->niter))
1044 desc->~tree_niter_desc ();
1045 XDELETE (desc);
1046 desc = NULL;
1048 data->niters->put (exit, desc);
1050 else
1051 desc = *slot;
1053 return desc;
1056 /* Returns the structure describing number of iterations determined from
1057 single dominating exit of DATA->current_loop, or NULL if something
1058 goes wrong. */
1060 static class tree_niter_desc *
1061 niter_for_single_dom_exit (struct ivopts_data *data)
1063 edge exit = single_dom_exit (data->current_loop);
1065 if (!exit)
1066 return NULL;
1068 return niter_for_exit (data, exit);
1071 /* Initializes data structures used by the iv optimization pass, stored
1072 in DATA. */
1074 static void
1075 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1077 data->version_info_size = 2 * num_ssa_names;
1078 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1079 data->relevant = BITMAP_ALLOC (NULL);
1080 data->important_candidates = BITMAP_ALLOC (NULL);
1081 data->max_inv_var_id = 0;
1082 data->max_inv_expr_id = 0;
1083 data->niters = NULL;
1084 data->vgroups.create (20);
1085 data->vcands.create (20);
1086 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1087 data->name_expansion_cache = NULL;
1088 data->base_object_map = NULL;
1089 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1090 data->iv_common_cands.create (20);
1091 decl_rtl_to_reset.create (20);
1092 gcc_obstack_init (&data->iv_obstack);
1095 /* walk_tree callback for determine_base_object. */
1097 static tree
1098 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1100 tree_code code = TREE_CODE (*tp);
1101 tree obj = NULL_TREE;
1102 if (code == ADDR_EXPR)
1104 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1105 if (!base)
1106 obj = *tp;
1107 else if (TREE_CODE (base) != MEM_REF)
1108 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1110 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1111 obj = fold_convert (ptr_type_node, *tp);
1113 if (!obj)
1115 if (!EXPR_P (*tp))
1116 *walk_subtrees = 0;
1118 return NULL_TREE;
1120 /* Record special node for multiple base objects and stop. */
1121 if (*static_cast<tree *> (wdata))
1123 *static_cast<tree *> (wdata) = integer_zero_node;
1124 return integer_zero_node;
1126 /* Record the base object and continue looking. */
1127 *static_cast<tree *> (wdata) = obj;
1128 return NULL_TREE;
1131 /* Returns a memory object to that EXPR points with caching. Return NULL if we
1132 are able to determine that it does not point to any such object; specially
1133 return integer_zero_node if EXPR contains multiple base objects. */
1135 static tree
1136 determine_base_object (struct ivopts_data *data, tree expr)
1138 tree *slot, obj = NULL_TREE;
1139 if (data->base_object_map)
1141 if ((slot = data->base_object_map->get(expr)) != NULL)
1142 return *slot;
1144 else
1145 data->base_object_map = new hash_map<tree, tree>;
1147 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1148 data->base_object_map->put (expr, obj);
1149 return obj;
1152 /* Return true if address expression with non-DECL_P operand appears
1153 in EXPR. */
1155 static bool
1156 contain_complex_addr_expr (tree expr)
1158 bool res = false;
1160 STRIP_NOPS (expr);
1161 switch (TREE_CODE (expr))
1163 case POINTER_PLUS_EXPR:
1164 case PLUS_EXPR:
1165 case MINUS_EXPR:
1166 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1167 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1168 break;
1170 case ADDR_EXPR:
1171 return (!DECL_P (TREE_OPERAND (expr, 0)));
1173 default:
1174 return false;
1177 return res;
1180 /* Allocates an induction variable with given initial value BASE and step STEP
1181 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1183 static struct iv *
1184 alloc_iv (struct ivopts_data *data, tree base, tree step,
1185 bool no_overflow = false)
1187 tree expr = base;
1188 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1189 sizeof (struct iv));
1190 gcc_assert (step != NULL_TREE);
1192 /* Canonicalize the address expression in base if it were an unsigned
1193 computation. That leads to more equalities being detected and results in:
1195 1) More accurate cost can be computed for address expressions;
1196 2) Duplicate candidates won't be created for bases in different
1197 forms, like &a[0] and &a.
1198 3) Duplicate candidates won't be created for IV expressions that differ
1199 only in their sign. */
1200 aff_tree comb;
1201 STRIP_NOPS (expr);
1202 expr = fold_convert (unsigned_type_for (TREE_TYPE (expr)), expr);
1203 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1204 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1206 iv->base = base;
1207 iv->base_object = determine_base_object (data, base);
1208 iv->step = step;
1209 iv->biv_p = false;
1210 iv->nonlin_use = NULL;
1211 iv->ssa_name = NULL_TREE;
1212 if (!no_overflow
1213 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1214 base, step))
1215 no_overflow = true;
1216 iv->no_overflow = no_overflow;
1217 iv->have_address_use = false;
1219 return iv;
1222 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1223 doesn't overflow. */
1225 static void
1226 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1227 bool no_overflow)
1229 struct version_info *info = name_info (data, iv);
1231 gcc_assert (!info->iv);
1233 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1234 info->iv = alloc_iv (data, base, step, no_overflow);
1235 info->iv->ssa_name = iv;
1238 /* Finds induction variable declaration for VAR. */
1240 static struct iv *
1241 get_iv (struct ivopts_data *data, tree var)
1243 basic_block bb;
1244 tree type = TREE_TYPE (var);
1246 if (!POINTER_TYPE_P (type)
1247 && !INTEGRAL_TYPE_P (type))
1248 return NULL;
1250 if (!name_info (data, var)->iv)
1252 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1254 if (!bb
1255 || !flow_bb_inside_loop_p (data->current_loop, bb))
1257 if (POINTER_TYPE_P (type))
1258 type = sizetype;
1259 set_iv (data, var, var, build_int_cst (type, 0), true);
1263 return name_info (data, var)->iv;
1266 /* Return the first non-invariant ssa var found in EXPR. */
1268 static tree
1269 extract_single_var_from_expr (tree expr)
1271 int i, n;
1272 tree tmp;
1273 enum tree_code code;
1275 if (!expr || is_gimple_min_invariant (expr))
1276 return NULL;
1278 code = TREE_CODE (expr);
1279 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1281 n = TREE_OPERAND_LENGTH (expr);
1282 for (i = 0; i < n; i++)
1284 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1286 if (tmp)
1287 return tmp;
1290 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1293 /* Finds basic ivs. */
1295 static bool
1296 find_bivs (struct ivopts_data *data)
1298 gphi *phi;
1299 affine_iv iv;
1300 tree step, type, base, stop;
1301 bool found = false;
1302 class loop *loop = data->current_loop;
1303 gphi_iterator psi;
1305 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1307 phi = psi.phi ();
1309 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1310 continue;
1312 if (virtual_operand_p (PHI_RESULT (phi)))
1313 continue;
1315 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1316 continue;
1318 if (integer_zerop (iv.step))
1319 continue;
1321 step = iv.step;
1322 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1323 /* Stop expanding iv base at the first ssa var referred by iv step.
1324 Ideally we should stop at any ssa var, because that's expensive
1325 and unusual to happen, we just do it on the first one.
1327 See PR64705 for the rationale. */
1328 stop = extract_single_var_from_expr (step);
1329 base = expand_simple_operations (base, stop);
1330 if (contains_abnormal_ssa_name_p (base)
1331 || contains_abnormal_ssa_name_p (step))
1332 continue;
1334 type = TREE_TYPE (PHI_RESULT (phi));
1335 base = fold_convert (type, base);
1336 if (step)
1338 if (POINTER_TYPE_P (type))
1339 step = convert_to_ptrofftype (step);
1340 else
1341 step = fold_convert (type, step);
1344 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1345 found = true;
1348 return found;
1351 /* Marks basic ivs. */
1353 static void
1354 mark_bivs (struct ivopts_data *data)
1356 gphi *phi;
1357 gimple *def;
1358 tree var;
1359 struct iv *iv, *incr_iv;
1360 class loop *loop = data->current_loop;
1361 basic_block incr_bb;
1362 gphi_iterator psi;
1364 data->bivs_not_used_in_addr = 0;
1365 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1367 phi = psi.phi ();
1369 iv = get_iv (data, PHI_RESULT (phi));
1370 if (!iv)
1371 continue;
1373 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1374 def = SSA_NAME_DEF_STMT (var);
1375 /* Don't mark iv peeled from other one as biv. */
1376 if (def
1377 && gimple_code (def) == GIMPLE_PHI
1378 && gimple_bb (def) == loop->header)
1379 continue;
1381 incr_iv = get_iv (data, var);
1382 if (!incr_iv)
1383 continue;
1385 /* If the increment is in the subloop, ignore it. */
1386 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1387 if (incr_bb->loop_father != data->current_loop
1388 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1389 continue;
1391 iv->biv_p = true;
1392 incr_iv->biv_p = true;
1393 if (iv->no_overflow)
1394 data->bivs_not_used_in_addr++;
1395 if (incr_iv->no_overflow)
1396 data->bivs_not_used_in_addr++;
1400 /* Checks whether STMT defines a linear induction variable and stores its
1401 parameters to IV. */
1403 static bool
1404 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1406 tree lhs, stop;
1407 class loop *loop = data->current_loop;
1409 iv->base = NULL_TREE;
1410 iv->step = NULL_TREE;
1412 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1413 return false;
1415 lhs = gimple_assign_lhs (stmt);
1416 if (TREE_CODE (lhs) != SSA_NAME)
1417 return false;
1419 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1420 return false;
1422 /* Stop expanding iv base at the first ssa var referred by iv step.
1423 Ideally we should stop at any ssa var, because that's expensive
1424 and unusual to happen, we just do it on the first one.
1426 See PR64705 for the rationale. */
1427 stop = extract_single_var_from_expr (iv->step);
1428 iv->base = expand_simple_operations (iv->base, stop);
1429 if (contains_abnormal_ssa_name_p (iv->base)
1430 || contains_abnormal_ssa_name_p (iv->step))
1431 return false;
1433 /* If STMT could throw, then do not consider STMT as defining a GIV.
1434 While this will suppress optimizations, we cannot safely delete this
1435 GIV and associated statements, even if it appears it is not used. */
1436 if (stmt_could_throw_p (cfun, stmt))
1437 return false;
1439 return true;
1442 /* Finds general ivs in statement STMT. */
1444 static void
1445 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1447 affine_iv iv;
1449 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1450 return;
1452 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1455 /* Finds general ivs in basic block BB. */
1457 static void
1458 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1460 gimple_stmt_iterator bsi;
1462 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1463 if (!is_gimple_debug (gsi_stmt (bsi)))
1464 find_givs_in_stmt (data, gsi_stmt (bsi));
1467 /* Finds general ivs. */
1469 static void
1470 find_givs (struct ivopts_data *data, basic_block *body)
1472 class loop *loop = data->current_loop;
1473 unsigned i;
1475 for (i = 0; i < loop->num_nodes; i++)
1476 find_givs_in_bb (data, body[i]);
1479 /* For each ssa name defined in LOOP determines whether it is an induction
1480 variable and if so, its initial value and step. */
1482 static bool
1483 find_induction_variables (struct ivopts_data *data, basic_block *body)
1485 unsigned i;
1486 bitmap_iterator bi;
1488 if (!find_bivs (data))
1489 return false;
1491 find_givs (data, body);
1492 mark_bivs (data);
1494 if (dump_file && (dump_flags & TDF_DETAILS))
1496 class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1498 if (niter)
1500 fprintf (dump_file, " number of iterations ");
1501 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1502 if (!integer_zerop (niter->may_be_zero))
1504 fprintf (dump_file, "; zero if ");
1505 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1507 fprintf (dump_file, "\n");
1510 fprintf (dump_file, "\n<Induction Vars>:\n");
1511 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1513 struct version_info *info = ver_info (data, i);
1514 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1515 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1519 return true;
1522 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1523 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1524 is the const offset stripped from IV base and MEM_TYPE is the type
1525 of the memory being addressed. For uses of other types, ADDR_BASE
1526 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1528 static struct iv_use *
1529 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1530 gimple *stmt, enum use_type type, tree mem_type,
1531 tree addr_base, poly_uint64 addr_offset)
1533 struct iv_use *use = XCNEW (struct iv_use);
1535 use->id = group->vuses.length ();
1536 use->group_id = group->id;
1537 use->type = type;
1538 use->mem_type = mem_type;
1539 use->iv = iv;
1540 use->stmt = stmt;
1541 use->op_p = use_p;
1542 use->addr_base = addr_base;
1543 use->addr_offset = addr_offset;
1545 group->vuses.safe_push (use);
1546 return use;
1549 /* Checks whether OP is a loop-level invariant and if so, records it.
1550 NONLINEAR_USE is true if the invariant is used in a way we do not
1551 handle specially. */
1553 static void
1554 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1556 basic_block bb;
1557 struct version_info *info;
1559 if (TREE_CODE (op) != SSA_NAME
1560 || virtual_operand_p (op))
1561 return;
1563 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1564 if (bb
1565 && flow_bb_inside_loop_p (data->current_loop, bb))
1566 return;
1568 info = name_info (data, op);
1569 info->name = op;
1570 info->has_nonlin_use |= nonlinear_use;
1571 if (!info->inv_id)
1572 info->inv_id = ++data->max_inv_var_id;
1573 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1576 /* Record a group of TYPE. */
1578 static struct iv_group *
1579 record_group (struct ivopts_data *data, enum use_type type)
1581 struct iv_group *group = XCNEW (struct iv_group);
1583 group->id = data->vgroups.length ();
1584 group->type = type;
1585 group->related_cands = BITMAP_ALLOC (NULL);
1586 group->vuses.create (1);
1587 group->doloop_p = false;
1589 data->vgroups.safe_push (group);
1590 return group;
1593 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1594 New group will be created if there is no existing group for the use.
1595 MEM_TYPE is the type of memory being addressed, or NULL if this
1596 isn't an address reference. */
1598 static struct iv_use *
1599 record_group_use (struct ivopts_data *data, tree *use_p,
1600 struct iv *iv, gimple *stmt, enum use_type type,
1601 tree mem_type)
1603 tree addr_base = NULL;
1604 struct iv_group *group = NULL;
1605 poly_uint64 addr_offset = 0;
1607 /* Record non address type use in a new group. */
1608 if (address_p (type))
1610 unsigned int i;
1612 gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1613 tree addr_toffset;
1614 split_constant_offset (iv->base, &addr_base, &addr_toffset);
1615 addr_offset = int_cst_value (addr_toffset);
1616 for (i = 0; i < data->vgroups.length (); i++)
1618 struct iv_use *use;
1620 group = data->vgroups[i];
1621 use = group->vuses[0];
1622 if (!address_p (use->type))
1623 continue;
1625 /* Check if it has the same stripped base and step. */
1626 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1627 && operand_equal_p (iv->step, use->iv->step, OEP_ASSUME_WRAPV)
1628 && operand_equal_p (addr_base, use->addr_base, OEP_ASSUME_WRAPV))
1629 break;
1631 if (i == data->vgroups.length ())
1632 group = NULL;
1635 if (!group)
1636 group = record_group (data, type);
1638 return record_use (group, use_p, iv, stmt, type, mem_type,
1639 addr_base, addr_offset);
1642 /* Checks whether the use OP is interesting and if so, records it. */
1644 static struct iv_use *
1645 find_interesting_uses_op (struct ivopts_data *data, tree op)
1647 struct iv *iv;
1648 gimple *stmt;
1649 struct iv_use *use;
1651 if (TREE_CODE (op) != SSA_NAME)
1652 return NULL;
1654 iv = get_iv (data, op);
1655 if (!iv)
1656 return NULL;
1658 if (iv->nonlin_use)
1660 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1661 return iv->nonlin_use;
1664 if (integer_zerop (iv->step))
1666 record_invariant (data, op, true);
1667 return NULL;
1670 stmt = SSA_NAME_DEF_STMT (op);
1671 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1673 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1674 iv->nonlin_use = use;
1675 return use;
1678 /* Indicate how compare type iv_use can be handled. */
1679 enum comp_iv_rewrite
1681 COMP_IV_NA,
1682 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1683 COMP_IV_EXPR,
1684 /* We may rewrite compare type iv_uses on both sides of comparison by
1685 expressing value of each iv_use. */
1686 COMP_IV_EXPR_2,
1687 /* We may rewrite compare type iv_use by expressing value of the iv_use
1688 or by eliminating it with other iv_cand. */
1689 COMP_IV_ELIM
1692 /* Given a condition in statement STMT, checks whether it is a compare
1693 of an induction variable and an invariant. If this is the case,
1694 CONTROL_VAR is set to location of the iv, BOUND to the location of
1695 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1696 induction variable descriptions, and true is returned. If this is not
1697 the case, CONTROL_VAR and BOUND are set to the arguments of the
1698 condition and false is returned. */
1700 static enum comp_iv_rewrite
1701 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1702 tree **control_var, tree **bound,
1703 struct iv **iv_var, struct iv **iv_bound)
1705 /* The objects returned when COND has constant operands. */
1706 static struct iv const_iv;
1707 static tree zero;
1708 tree *op0 = &zero, *op1 = &zero;
1709 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1710 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1712 if (gimple_code (stmt) == GIMPLE_COND)
1714 gcond *cond_stmt = as_a <gcond *> (stmt);
1715 op0 = gimple_cond_lhs_ptr (cond_stmt);
1716 op1 = gimple_cond_rhs_ptr (cond_stmt);
1718 else
1720 op0 = gimple_assign_rhs1_ptr (stmt);
1721 op1 = gimple_assign_rhs2_ptr (stmt);
1724 zero = integer_zero_node;
1725 const_iv.step = integer_zero_node;
1727 if (TREE_CODE (*op0) == SSA_NAME)
1728 iv0 = get_iv (data, *op0);
1729 if (TREE_CODE (*op1) == SSA_NAME)
1730 iv1 = get_iv (data, *op1);
1732 /* If both sides of comparison are IVs. We can express ivs on both end. */
1733 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1735 rewrite_type = COMP_IV_EXPR_2;
1736 goto end;
1739 /* If none side of comparison is IV. */
1740 if ((!iv0 || integer_zerop (iv0->step))
1741 && (!iv1 || integer_zerop (iv1->step)))
1742 goto end;
1744 /* Control variable may be on the other side. */
1745 if (!iv0 || integer_zerop (iv0->step))
1747 std::swap (op0, op1);
1748 std::swap (iv0, iv1);
1750 /* If one side is IV and the other side isn't loop invariant. */
1751 if (!iv1)
1752 rewrite_type = COMP_IV_EXPR;
1753 /* If one side is IV and the other side is loop invariant. */
1754 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1755 rewrite_type = COMP_IV_ELIM;
1757 end:
1758 if (control_var)
1759 *control_var = op0;
1760 if (iv_var)
1761 *iv_var = iv0;
1762 if (bound)
1763 *bound = op1;
1764 if (iv_bound)
1765 *iv_bound = iv1;
1767 return rewrite_type;
1770 /* Checks whether the condition in STMT is interesting and if so,
1771 records it. */
1773 static void
1774 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1776 tree *var_p, *bound_p;
1777 struct iv *var_iv, *bound_iv;
1778 enum comp_iv_rewrite ret;
1780 ret = extract_cond_operands (data, stmt,
1781 &var_p, &bound_p, &var_iv, &bound_iv);
1782 if (ret == COMP_IV_NA)
1784 find_interesting_uses_op (data, *var_p);
1785 find_interesting_uses_op (data, *bound_p);
1786 return;
1789 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1790 /* Record compare type iv_use for iv on the other side of comparison. */
1791 if (ret == COMP_IV_EXPR_2)
1792 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1795 /* Returns the outermost loop EXPR is obviously invariant in
1796 relative to the loop LOOP, i.e. if all its operands are defined
1797 outside of the returned loop. Returns NULL if EXPR is not
1798 even obviously invariant in LOOP. */
1800 class loop *
1801 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1803 basic_block def_bb;
1804 unsigned i, len;
1806 if (is_gimple_min_invariant (expr))
1807 return current_loops->tree_root;
1809 if (TREE_CODE (expr) == SSA_NAME)
1811 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1812 if (def_bb)
1814 if (flow_bb_inside_loop_p (loop, def_bb))
1815 return NULL;
1816 return superloop_at_depth (loop,
1817 loop_depth (def_bb->loop_father) + 1);
1820 return current_loops->tree_root;
1823 if (!EXPR_P (expr))
1824 return NULL;
1826 unsigned maxdepth = 0;
1827 len = TREE_OPERAND_LENGTH (expr);
1828 for (i = 0; i < len; i++)
1830 class loop *ivloop;
1831 if (!TREE_OPERAND (expr, i))
1832 continue;
1834 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1835 if (!ivloop)
1836 return NULL;
1837 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1840 return superloop_at_depth (loop, maxdepth);
1843 /* Returns true if expression EXPR is obviously invariant in LOOP,
1844 i.e. if all its operands are defined outside of the LOOP. LOOP
1845 should not be the function body. */
1847 bool
1848 expr_invariant_in_loop_p (class loop *loop, tree expr)
1850 basic_block def_bb;
1851 unsigned i, len;
1853 gcc_assert (loop_depth (loop) > 0);
1855 if (is_gimple_min_invariant (expr))
1856 return true;
1858 if (TREE_CODE (expr) == SSA_NAME)
1860 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1861 if (def_bb
1862 && flow_bb_inside_loop_p (loop, def_bb))
1863 return false;
1865 return true;
1868 if (!EXPR_P (expr))
1869 return false;
1871 len = TREE_OPERAND_LENGTH (expr);
1872 for (i = 0; i < len; i++)
1873 if (TREE_OPERAND (expr, i)
1874 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1875 return false;
1877 return true;
1880 /* Given expression EXPR which computes inductive values with respect
1881 to loop recorded in DATA, this function returns biv from which EXPR
1882 is derived by tracing definition chains of ssa variables in EXPR. */
1884 static struct iv*
1885 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1887 struct iv *iv;
1888 unsigned i, n;
1889 tree e2, e1;
1890 enum tree_code code;
1891 gimple *stmt;
1893 if (expr == NULL_TREE)
1894 return NULL;
1896 if (is_gimple_min_invariant (expr))
1897 return NULL;
1899 code = TREE_CODE (expr);
1900 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1902 n = TREE_OPERAND_LENGTH (expr);
1903 for (i = 0; i < n; i++)
1905 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1906 if (iv)
1907 return iv;
1911 /* Stop if it's not ssa name. */
1912 if (code != SSA_NAME)
1913 return NULL;
1915 iv = get_iv (data, expr);
1916 if (!iv || integer_zerop (iv->step))
1917 return NULL;
1918 else if (iv->biv_p)
1919 return iv;
1921 stmt = SSA_NAME_DEF_STMT (expr);
1922 if (gphi *phi = dyn_cast <gphi *> (stmt))
1924 ssa_op_iter iter;
1925 use_operand_p use_p;
1926 basic_block phi_bb = gimple_bb (phi);
1928 /* Skip loop header PHI that doesn't define biv. */
1929 if (phi_bb->loop_father == data->current_loop)
1930 return NULL;
1932 if (virtual_operand_p (gimple_phi_result (phi)))
1933 return NULL;
1935 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1937 tree use = USE_FROM_PTR (use_p);
1938 iv = find_deriving_biv_for_expr (data, use);
1939 if (iv)
1940 return iv;
1942 return NULL;
1944 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1945 return NULL;
1947 e1 = gimple_assign_rhs1 (stmt);
1948 code = gimple_assign_rhs_code (stmt);
1949 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1950 return find_deriving_biv_for_expr (data, e1);
1952 switch (code)
1954 case MULT_EXPR:
1955 case PLUS_EXPR:
1956 case MINUS_EXPR:
1957 case POINTER_PLUS_EXPR:
1958 /* Increments, decrements and multiplications by a constant
1959 are simple. */
1960 e2 = gimple_assign_rhs2 (stmt);
1961 iv = find_deriving_biv_for_expr (data, e2);
1962 if (iv)
1963 return iv;
1964 gcc_fallthrough ();
1966 CASE_CONVERT:
1967 /* Casts are simple. */
1968 return find_deriving_biv_for_expr (data, e1);
1970 default:
1971 break;
1974 return NULL;
1977 /* Record BIV, its predecessor and successor that they are used in
1978 address type uses. */
1980 static void
1981 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1983 unsigned i;
1984 tree type, base_1, base_2;
1985 bitmap_iterator bi;
1987 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1988 || biv->have_address_use || !biv->no_overflow)
1989 return;
1991 type = TREE_TYPE (biv->base);
1992 if (!INTEGRAL_TYPE_P (type))
1993 return;
1995 biv->have_address_use = true;
1996 data->bivs_not_used_in_addr--;
1997 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1998 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2000 struct iv *iv = ver_info (data, i)->iv;
2002 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2003 || iv->have_address_use || !iv->no_overflow)
2004 continue;
2006 if (type != TREE_TYPE (iv->base)
2007 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2008 continue;
2010 if (!operand_equal_p (biv->step, iv->step, 0))
2011 continue;
2013 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2014 if (operand_equal_p (base_1, iv->base, 0)
2015 || operand_equal_p (base_2, biv->base, 0))
2017 iv->have_address_use = true;
2018 data->bivs_not_used_in_addr--;
2023 /* Cumulates the steps of indices into DATA and replaces their values with the
2024 initial ones. Returns false when the value of the index cannot be determined.
2025 Callback for for_each_index. */
2027 struct ifs_ivopts_data
2029 struct ivopts_data *ivopts_data;
2030 gimple *stmt;
2031 tree step;
2034 static bool
2035 idx_find_step (tree base, tree *idx, void *data)
2037 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2038 struct iv *iv;
2039 bool use_overflow_semantics = false;
2040 tree step, iv_base, iv_step, lbound, off;
2041 class loop *loop = dta->ivopts_data->current_loop;
2043 /* If base is a component ref, require that the offset of the reference
2044 be invariant. */
2045 if (TREE_CODE (base) == COMPONENT_REF)
2047 off = component_ref_field_offset (base);
2048 return expr_invariant_in_loop_p (loop, off);
2051 /* If base is array, first check whether we will be able to move the
2052 reference out of the loop (in order to take its address in strength
2053 reduction). In order for this to work we need both lower bound
2054 and step to be loop invariants. */
2055 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2057 /* Moreover, for a range, the size needs to be invariant as well. */
2058 if (TREE_CODE (base) == ARRAY_RANGE_REF
2059 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2060 return false;
2062 step = array_ref_element_size (base);
2063 lbound = array_ref_low_bound (base);
2065 if (!expr_invariant_in_loop_p (loop, step)
2066 || !expr_invariant_in_loop_p (loop, lbound))
2067 return false;
2070 if (TREE_CODE (*idx) != SSA_NAME)
2071 return true;
2073 iv = get_iv (dta->ivopts_data, *idx);
2074 if (!iv)
2075 return false;
2077 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2078 *&x[0], which is not folded and does not trigger the
2079 ARRAY_REF path below. */
2080 *idx = iv->base;
2082 if (integer_zerop (iv->step))
2083 return true;
2085 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2087 step = array_ref_element_size (base);
2089 /* We only handle addresses whose step is an integer constant. */
2090 if (TREE_CODE (step) != INTEGER_CST)
2091 return false;
2093 else
2094 /* The step for pointer arithmetics already is 1 byte. */
2095 step = size_one_node;
2097 iv_base = iv->base;
2098 iv_step = iv->step;
2099 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2100 use_overflow_semantics = true;
2102 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2103 sizetype, &iv_base, &iv_step, dta->stmt,
2104 use_overflow_semantics))
2106 /* The index might wrap. */
2107 return false;
2110 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2111 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2113 if (dta->ivopts_data->bivs_not_used_in_addr)
2115 if (!iv->biv_p)
2116 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2118 record_biv_for_address_use (dta->ivopts_data, iv);
2120 return true;
2123 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2124 object is passed to it in DATA. */
2126 static bool
2127 idx_record_use (tree base, tree *idx,
2128 void *vdata)
2130 struct ivopts_data *data = (struct ivopts_data *) vdata;
2131 find_interesting_uses_op (data, *idx);
2132 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2134 if (TREE_OPERAND (base, 2))
2135 find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2136 if (TREE_OPERAND (base, 3))
2137 find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2139 return true;
2142 /* If we can prove that TOP = cst * BOT for some constant cst,
2143 store cst to MUL and return true. Otherwise return false.
2144 The returned value is always sign-extended, regardless of the
2145 signedness of TOP and BOT. */
2147 static bool
2148 constant_multiple_of (tree top, tree bot, widest_int *mul)
2150 aff_tree aff_top, aff_bot;
2151 tree_to_aff_combination (top, TREE_TYPE (top), &aff_top);
2152 tree_to_aff_combination (bot, TREE_TYPE (bot), &aff_bot);
2153 poly_widest_int poly_mul;
2154 if (aff_combination_constant_multiple_p (&aff_top, &aff_bot, &poly_mul)
2155 && poly_mul.is_constant (mul))
2156 return true;
2158 return false;
2161 /* Return true if memory reference REF with step STEP may be unaligned. */
2163 static bool
2164 may_be_unaligned_p (tree ref, tree step)
2166 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2167 thus they are not misaligned. */
2168 if (TREE_CODE (ref) == TARGET_MEM_REF)
2169 return false;
2171 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2172 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2173 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2175 unsigned HOST_WIDE_INT bitpos;
2176 unsigned int ref_align;
2177 get_object_alignment_1 (ref, &ref_align, &bitpos);
2178 if (ref_align < align
2179 || (bitpos % align) != 0
2180 || (bitpos % BITS_PER_UNIT) != 0)
2181 return true;
2183 unsigned int trailing_zeros = tree_ctz (step);
2184 if (trailing_zeros < HOST_BITS_PER_INT
2185 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2186 return true;
2188 return false;
2191 /* Return true if EXPR may be non-addressable. */
2193 bool
2194 may_be_nonaddressable_p (tree expr)
2196 switch (TREE_CODE (expr))
2198 case VAR_DECL:
2199 /* Check if it's a register variable. */
2200 return DECL_HARD_REGISTER (expr);
2202 case TARGET_MEM_REF:
2203 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2204 target, thus they are always addressable. */
2205 return false;
2207 case MEM_REF:
2208 /* Likewise for MEM_REFs, modulo the storage order. */
2209 return REF_REVERSE_STORAGE_ORDER (expr);
2211 case BIT_FIELD_REF:
2212 if (REF_REVERSE_STORAGE_ORDER (expr))
2213 return true;
2214 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2216 case COMPONENT_REF:
2217 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2218 return true;
2219 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2220 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2222 case ARRAY_REF:
2223 case ARRAY_RANGE_REF:
2224 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2225 return true;
2226 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2228 case VIEW_CONVERT_EXPR:
2229 /* This kind of view-conversions may wrap non-addressable objects
2230 and make them look addressable. After some processing the
2231 non-addressability may be uncovered again, causing ADDR_EXPRs
2232 of inappropriate objects to be built. */
2233 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2234 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2235 return true;
2236 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2238 CASE_CONVERT:
2239 return true;
2241 default:
2242 break;
2245 return false;
2248 /* Finds addresses in *OP_P inside STMT. */
2250 static void
2251 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2252 tree *op_p)
2254 tree base = *op_p, step = size_zero_node;
2255 struct iv *civ;
2256 struct ifs_ivopts_data ifs_ivopts_data;
2258 /* Do not play with volatile memory references. A bit too conservative,
2259 perhaps, but safe. */
2260 if (gimple_has_volatile_ops (stmt))
2261 goto fail;
2263 /* Ignore bitfields for now. Not really something terribly complicated
2264 to handle. TODO. */
2265 if (TREE_CODE (base) == BIT_FIELD_REF)
2266 goto fail;
2268 base = unshare_expr (base);
2270 if (TREE_CODE (base) == TARGET_MEM_REF)
2272 tree type = build_pointer_type (TREE_TYPE (base));
2273 tree astep;
2275 if (TMR_BASE (base)
2276 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2278 civ = get_iv (data, TMR_BASE (base));
2279 if (!civ)
2280 goto fail;
2282 TMR_BASE (base) = civ->base;
2283 step = civ->step;
2285 if (TMR_INDEX2 (base)
2286 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2288 civ = get_iv (data, TMR_INDEX2 (base));
2289 if (!civ)
2290 goto fail;
2292 TMR_INDEX2 (base) = civ->base;
2293 step = civ->step;
2295 if (TMR_INDEX (base)
2296 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2298 civ = get_iv (data, TMR_INDEX (base));
2299 if (!civ)
2300 goto fail;
2302 TMR_INDEX (base) = civ->base;
2303 astep = civ->step;
2305 if (astep)
2307 if (TMR_STEP (base))
2308 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2310 step = fold_build2 (PLUS_EXPR, type, step, astep);
2314 if (integer_zerop (step))
2315 goto fail;
2316 base = tree_mem_ref_addr (type, base);
2318 else
2320 ifs_ivopts_data.ivopts_data = data;
2321 ifs_ivopts_data.stmt = stmt;
2322 ifs_ivopts_data.step = size_zero_node;
2323 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2324 || integer_zerop (ifs_ivopts_data.step))
2325 goto fail;
2326 step = ifs_ivopts_data.step;
2328 /* Check that the base expression is addressable. This needs
2329 to be done after substituting bases of IVs into it. */
2330 if (may_be_nonaddressable_p (base))
2331 goto fail;
2333 /* Moreover, on strict alignment platforms, check that it is
2334 sufficiently aligned. */
2335 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2336 goto fail;
2338 base = build_fold_addr_expr (base);
2340 /* Substituting bases of IVs into the base expression might
2341 have caused folding opportunities. */
2342 if (TREE_CODE (base) == ADDR_EXPR)
2344 tree *ref = &TREE_OPERAND (base, 0);
2345 while (handled_component_p (*ref))
2346 ref = &TREE_OPERAND (*ref, 0);
2347 if (TREE_CODE (*ref) == MEM_REF)
2349 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2350 TREE_OPERAND (*ref, 0),
2351 TREE_OPERAND (*ref, 1));
2352 if (tem)
2353 *ref = tem;
2358 civ = alloc_iv (data, base, step);
2359 /* Fail if base object of this memory reference is unknown. */
2360 if (civ->base_object == NULL_TREE)
2361 goto fail;
2363 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2364 return;
2366 fail:
2367 for_each_index (op_p, idx_record_use, data);
2370 /* Finds and records invariants used in STMT. */
2372 static void
2373 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2375 ssa_op_iter iter;
2376 use_operand_p use_p;
2377 tree op;
2379 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2381 op = USE_FROM_PTR (use_p);
2382 record_invariant (data, op, false);
2386 /* CALL calls an internal function. If operand *OP_P will become an
2387 address when the call is expanded, return the type of the memory
2388 being addressed, otherwise return null. */
2390 static tree
2391 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2393 switch (gimple_call_internal_fn (call))
2395 case IFN_MASK_LOAD:
2396 case IFN_MASK_LOAD_LANES:
2397 case IFN_MASK_LEN_LOAD_LANES:
2398 case IFN_LEN_LOAD:
2399 case IFN_MASK_LEN_LOAD:
2400 if (op_p == gimple_call_arg_ptr (call, 0))
2401 return TREE_TYPE (gimple_call_lhs (call));
2402 return NULL_TREE;
2404 case IFN_MASK_STORE:
2405 case IFN_MASK_STORE_LANES:
2406 case IFN_MASK_LEN_STORE_LANES:
2407 case IFN_LEN_STORE:
2408 case IFN_MASK_LEN_STORE:
2410 if (op_p == gimple_call_arg_ptr (call, 0))
2412 internal_fn ifn = gimple_call_internal_fn (call);
2413 int index = internal_fn_stored_value_index (ifn);
2414 return TREE_TYPE (gimple_call_arg (call, index));
2416 return NULL_TREE;
2419 default:
2420 return NULL_TREE;
2424 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2425 Return true if the operand will become an address when STMT
2426 is expanded and record the associated address use if so. */
2428 static bool
2429 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2430 struct iv *iv)
2432 /* Fail if base object of this memory reference is unknown. */
2433 if (iv->base_object == NULL_TREE)
2434 return false;
2436 tree mem_type = NULL_TREE;
2437 if (gcall *call = dyn_cast <gcall *> (stmt))
2438 if (gimple_call_internal_p (call))
2439 mem_type = get_mem_type_for_internal_fn (call, op_p);
2440 if (mem_type)
2442 iv = alloc_iv (data, iv->base, iv->step);
2443 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2444 return true;
2446 return false;
2449 /* Finds interesting uses of induction variables in the statement STMT. */
2451 static void
2452 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2454 struct iv *iv;
2455 tree op, *lhs, *rhs;
2456 ssa_op_iter iter;
2457 use_operand_p use_p;
2458 enum tree_code code;
2460 find_invariants_stmt (data, stmt);
2462 if (gimple_code (stmt) == GIMPLE_COND)
2464 find_interesting_uses_cond (data, stmt);
2465 return;
2468 if (is_gimple_assign (stmt))
2470 lhs = gimple_assign_lhs_ptr (stmt);
2471 rhs = gimple_assign_rhs1_ptr (stmt);
2473 if (TREE_CODE (*lhs) == SSA_NAME)
2475 /* If the statement defines an induction variable, the uses are not
2476 interesting by themselves. */
2478 iv = get_iv (data, *lhs);
2480 if (iv && !integer_zerop (iv->step))
2481 return;
2484 code = gimple_assign_rhs_code (stmt);
2485 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2486 && (REFERENCE_CLASS_P (*rhs)
2487 || is_gimple_val (*rhs)))
2489 if (REFERENCE_CLASS_P (*rhs))
2490 find_interesting_uses_address (data, stmt, rhs);
2491 else
2492 find_interesting_uses_op (data, *rhs);
2494 if (REFERENCE_CLASS_P (*lhs))
2495 find_interesting_uses_address (data, stmt, lhs);
2496 return;
2498 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2500 find_interesting_uses_cond (data, stmt);
2501 return;
2504 /* TODO -- we should also handle address uses of type
2506 memory = call (whatever);
2510 call (memory). */
2513 if (gimple_code (stmt) == GIMPLE_PHI
2514 && gimple_bb (stmt) == data->current_loop->header)
2516 iv = get_iv (data, PHI_RESULT (stmt));
2518 if (iv && !integer_zerop (iv->step))
2519 return;
2522 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2524 op = USE_FROM_PTR (use_p);
2526 if (TREE_CODE (op) != SSA_NAME)
2527 continue;
2529 iv = get_iv (data, op);
2530 if (!iv)
2531 continue;
2533 if (!find_address_like_use (data, stmt, use_p->use, iv))
2534 find_interesting_uses_op (data, op);
2538 /* Finds interesting uses of induction variables outside of loops
2539 on loop exit edge EXIT. */
2541 static void
2542 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2544 gphi *phi;
2545 gphi_iterator psi;
2546 tree def;
2548 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2550 phi = psi.phi ();
2551 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2552 if (!virtual_operand_p (def))
2553 find_interesting_uses_op (data, def);
2557 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2558 mode for memory reference represented by USE. */
2560 static GTY (()) vec<rtx, va_gc> *addr_list;
2562 static bool
2563 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2565 rtx reg, addr;
2566 unsigned list_index;
2567 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2568 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2570 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2571 if (list_index >= vec_safe_length (addr_list))
2572 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2574 addr = (*addr_list)[list_index];
2575 if (!addr)
2577 addr_mode = targetm.addr_space.address_mode (as);
2578 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2579 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2580 (*addr_list)[list_index] = addr;
2582 else
2583 addr_mode = GET_MODE (addr);
2585 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2586 return (memory_address_addr_space_p (mem_mode, addr, as));
2589 /* Comparison function to sort group in ascending order of addr_offset. */
2591 static int
2592 group_compare_offset (const void *a, const void *b)
2594 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2595 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2597 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2600 /* Check if small groups should be split. Return true if no group
2601 contains more than two uses with distinct addr_offsets. Return
2602 false otherwise. We want to split such groups because:
2604 1) Small groups don't have much benefit and may interfer with
2605 general candidate selection.
2606 2) Size for problem with only small groups is usually small and
2607 general algorithm can handle it well.
2609 TODO -- Above claim may not hold when we want to merge memory
2610 accesses with conseuctive addresses. */
2612 static bool
2613 split_small_address_groups_p (struct ivopts_data *data)
2615 unsigned int i, j, distinct = 1;
2616 struct iv_use *pre;
2617 struct iv_group *group;
2619 for (i = 0; i < data->vgroups.length (); i++)
2621 group = data->vgroups[i];
2622 if (group->vuses.length () == 1)
2623 continue;
2625 gcc_assert (address_p (group->type));
2626 if (group->vuses.length () == 2)
2628 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2629 group->vuses[1]->addr_offset) > 0)
2630 std::swap (group->vuses[0], group->vuses[1]);
2632 else
2633 group->vuses.qsort (group_compare_offset);
2635 if (distinct > 2)
2636 continue;
2638 distinct = 1;
2639 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2641 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2643 pre = group->vuses[j];
2644 distinct++;
2647 if (distinct > 2)
2648 break;
2652 return (distinct <= 2);
2655 /* For each group of address type uses, this function further groups
2656 these uses according to the maximum offset supported by target's
2657 [base + offset] addressing mode. */
2659 static void
2660 split_address_groups (struct ivopts_data *data)
2662 unsigned int i, j;
2663 /* Always split group. */
2664 bool split_p = split_small_address_groups_p (data);
2666 for (i = 0; i < data->vgroups.length (); i++)
2668 struct iv_group *new_group = NULL;
2669 struct iv_group *group = data->vgroups[i];
2670 struct iv_use *use = group->vuses[0];
2672 use->id = 0;
2673 use->group_id = group->id;
2674 if (group->vuses.length () == 1)
2675 continue;
2677 gcc_assert (address_p (use->type));
2679 for (j = 1; j < group->vuses.length ();)
2681 struct iv_use *next = group->vuses[j];
2682 poly_int64 offset = next->addr_offset - use->addr_offset;
2684 /* Split group if aksed to, or the offset against the first
2685 use can't fit in offset part of addressing mode. IV uses
2686 having the same offset are still kept in one group. */
2687 if (maybe_ne (offset, 0)
2688 && (split_p || !addr_offset_valid_p (use, offset)))
2690 if (!new_group)
2691 new_group = record_group (data, group->type);
2692 group->vuses.ordered_remove (j);
2693 new_group->vuses.safe_push (next);
2694 continue;
2697 next->id = j;
2698 next->group_id = group->id;
2699 j++;
2704 /* Finds uses of the induction variables that are interesting. */
2706 static void
2707 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2709 basic_block bb;
2710 gimple_stmt_iterator bsi;
2711 unsigned i;
2712 edge e;
2714 for (i = 0; i < data->current_loop->num_nodes; i++)
2716 edge_iterator ei;
2717 bb = body[i];
2719 FOR_EACH_EDGE (e, ei, bb->succs)
2720 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2721 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2722 find_interesting_uses_outside (data, e);
2724 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2725 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2726 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2727 if (!is_gimple_debug (gsi_stmt (bsi)))
2728 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2731 split_address_groups (data);
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2735 fprintf (dump_file, "\n<IV Groups>:\n");
2736 dump_groups (dump_file, data);
2737 fprintf (dump_file, "\n");
2741 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2742 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2743 we are at the top-level of the processed address. */
2745 static tree
2746 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2747 poly_int64 *offset)
2749 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2750 enum tree_code code;
2751 tree type, orig_type = TREE_TYPE (expr);
2752 poly_int64 off0, off1;
2753 HOST_WIDE_INT st;
2754 tree orig_expr = expr;
2756 STRIP_NOPS (expr);
2758 type = TREE_TYPE (expr);
2759 code = TREE_CODE (expr);
2760 *offset = 0;
2762 switch (code)
2764 case POINTER_PLUS_EXPR:
2765 case PLUS_EXPR:
2766 case MINUS_EXPR:
2767 op0 = TREE_OPERAND (expr, 0);
2768 op1 = TREE_OPERAND (expr, 1);
2770 op0 = strip_offset_1 (op0, false, false, &off0);
2771 op1 = strip_offset_1 (op1, false, false, &off1);
2773 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2774 if (op0 == TREE_OPERAND (expr, 0)
2775 && op1 == TREE_OPERAND (expr, 1))
2776 return orig_expr;
2778 if (integer_zerop (op1))
2779 expr = op0;
2780 else if (integer_zerop (op0))
2782 if (code == MINUS_EXPR)
2784 if (TYPE_OVERFLOW_UNDEFINED (type))
2786 type = unsigned_type_for (type);
2787 op1 = fold_convert (type, op1);
2789 expr = fold_build1 (NEGATE_EXPR, type, op1);
2791 else
2792 expr = op1;
2794 else
2796 if (TYPE_OVERFLOW_UNDEFINED (type))
2798 type = unsigned_type_for (type);
2799 if (code == POINTER_PLUS_EXPR)
2800 code = PLUS_EXPR;
2801 op0 = fold_convert (type, op0);
2802 op1 = fold_convert (type, op1);
2804 expr = fold_build2 (code, type, op0, op1);
2807 return fold_convert (orig_type, expr);
2809 case MULT_EXPR:
2810 op1 = TREE_OPERAND (expr, 1);
2811 if (!cst_and_fits_in_hwi (op1))
2812 return orig_expr;
2814 op0 = TREE_OPERAND (expr, 0);
2815 op0 = strip_offset_1 (op0, false, false, &off0);
2816 if (op0 == TREE_OPERAND (expr, 0))
2817 return orig_expr;
2819 *offset = off0 * int_cst_value (op1);
2820 if (integer_zerop (op0))
2821 expr = op0;
2822 else
2824 if (TYPE_OVERFLOW_UNDEFINED (type))
2826 type = unsigned_type_for (type);
2827 op0 = fold_convert (type, op0);
2828 op1 = fold_convert (type, op1);
2830 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2833 return fold_convert (orig_type, expr);
2835 case ARRAY_REF:
2836 case ARRAY_RANGE_REF:
2837 if (!inside_addr)
2838 return orig_expr;
2840 step = array_ref_element_size (expr);
2841 if (!cst_and_fits_in_hwi (step))
2842 break;
2844 st = int_cst_value (step);
2845 op1 = TREE_OPERAND (expr, 1);
2846 op1 = strip_offset_1 (op1, false, false, &off1);
2847 *offset = off1 * st;
2849 if (top_compref
2850 && integer_zerop (op1))
2852 /* Strip the component reference completely. */
2853 op0 = TREE_OPERAND (expr, 0);
2854 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2855 *offset += off0;
2856 return op0;
2858 break;
2860 case COMPONENT_REF:
2862 tree field;
2864 if (!inside_addr)
2865 return orig_expr;
2867 tmp = component_ref_field_offset (expr);
2868 field = TREE_OPERAND (expr, 1);
2869 if (top_compref
2870 && cst_and_fits_in_hwi (tmp)
2871 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2873 HOST_WIDE_INT boffset, abs_off;
2875 /* Strip the component reference completely. */
2876 op0 = TREE_OPERAND (expr, 0);
2877 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2878 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2879 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2880 if (boffset < 0)
2881 abs_off = -abs_off;
2883 *offset = off0 + int_cst_value (tmp) + abs_off;
2884 return op0;
2887 break;
2889 case ADDR_EXPR:
2890 op0 = TREE_OPERAND (expr, 0);
2891 op0 = strip_offset_1 (op0, true, true, &off0);
2892 *offset += off0;
2894 if (op0 == TREE_OPERAND (expr, 0))
2895 return orig_expr;
2897 expr = build_fold_addr_expr (op0);
2898 return fold_convert (orig_type, expr);
2900 case MEM_REF:
2901 /* ??? Offset operand? */
2902 inside_addr = false;
2903 break;
2905 default:
2906 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2907 return build_int_cst (orig_type, 0);
2908 return orig_expr;
2911 /* Default handling of expressions for that we want to recurse into
2912 the first operand. */
2913 op0 = TREE_OPERAND (expr, 0);
2914 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2915 *offset += off0;
2917 if (op0 == TREE_OPERAND (expr, 0)
2918 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2919 return orig_expr;
2921 expr = copy_node (expr);
2922 TREE_OPERAND (expr, 0) = op0;
2923 if (op1)
2924 TREE_OPERAND (expr, 1) = op1;
2926 /* Inside address, we might strip the top level component references,
2927 thus changing type of the expression. Handling of ADDR_EXPR
2928 will fix that. */
2929 expr = fold_convert (orig_type, expr);
2931 return expr;
2934 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2936 static tree
2937 strip_offset (tree expr, poly_uint64 *offset)
2939 poly_int64 off;
2940 tree core = strip_offset_1 (expr, false, false, &off);
2941 *offset = off;
2942 return core;
2945 /* Returns variant of TYPE that can be used as base for different uses.
2946 We return unsigned type with the same precision, which avoids problems
2947 with overflows. */
2949 static tree
2950 generic_type_for (tree type)
2952 if (POINTER_TYPE_P (type))
2953 return unsigned_type_for (type);
2955 if (TYPE_UNSIGNED (type))
2956 return type;
2958 return unsigned_type_for (type);
2961 /* Private data for walk_tree. */
2963 struct walk_tree_data
2965 bitmap *inv_vars;
2966 struct ivopts_data *idata;
2969 /* Callback function for walk_tree, it records invariants and symbol
2970 reference in *EXPR_P. DATA is the structure storing result info. */
2972 static tree
2973 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2975 tree op = *expr_p;
2976 struct version_info *info;
2977 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2979 if (TREE_CODE (op) != SSA_NAME)
2980 return NULL_TREE;
2982 info = name_info (wdata->idata, op);
2983 /* Because we expand simple operations when finding IVs, loop invariant
2984 variable that isn't referred by the original loop could be used now.
2985 Record such invariant variables here. */
2986 if (!info->iv)
2988 struct ivopts_data *idata = wdata->idata;
2989 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2991 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2993 tree steptype = TREE_TYPE (op);
2994 if (POINTER_TYPE_P (steptype))
2995 steptype = sizetype;
2996 set_iv (idata, op, op, build_int_cst (steptype, 0), true);
2997 record_invariant (idata, op, false);
3000 if (!info->inv_id || info->has_nonlin_use)
3001 return NULL_TREE;
3003 if (!*wdata->inv_vars)
3004 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3005 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3007 return NULL_TREE;
3010 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3011 store it. */
3013 static inline void
3014 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3016 struct walk_tree_data wdata;
3018 if (!inv_vars)
3019 return;
3021 wdata.idata = data;
3022 wdata.inv_vars = inv_vars;
3023 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3026 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3027 will be recorded if it doesn't exist yet. Given below two exprs:
3028 inv_expr + cst1, inv_expr + cst2
3029 It's hard to make decision whether constant part should be stripped
3030 or not. We choose to not strip based on below facts:
3031 1) We need to count ADD cost for constant part if it's stripped,
3032 which isn't always trivial where this functions is called.
3033 2) Stripping constant away may be conflict with following loop
3034 invariant hoisting pass.
3035 3) Not stripping constant away results in more invariant exprs,
3036 which usually leads to decision preferring lower reg pressure. */
3038 static iv_inv_expr_ent *
3039 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3041 STRIP_NOPS (inv_expr);
3043 if (poly_int_tree_p (inv_expr)
3044 || TREE_CODE (inv_expr) == SSA_NAME)
3045 return NULL;
3047 /* Don't strip constant part away as we used to. */
3049 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3050 struct iv_inv_expr_ent ent;
3051 ent.expr = inv_expr;
3052 ent.hash = iterative_hash_expr (inv_expr, 0);
3053 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3055 if (!*slot)
3057 *slot = XNEW (struct iv_inv_expr_ent);
3058 (*slot)->expr = inv_expr;
3059 (*slot)->hash = ent.hash;
3060 (*slot)->id = ++data->max_inv_expr_id;
3063 return *slot;
3067 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3068 unsuitable as ivopts candidates for potentially involving undefined
3069 behavior. */
3071 static tree
3072 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3074 basic_block bb = (basic_block) bb_;
3075 if (TREE_CODE (*tp) == SSA_NAME
3076 && ssa_name_maybe_undef_p (*tp)
3077 && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3078 return *tp;
3079 if (!EXPR_P (*tp))
3080 *walk_subtrees = 0;
3081 return NULL;
3084 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3085 position to POS. If USE is not NULL, the candidate is set as related to
3086 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3087 replacement of the final value of the iv by a direct computation. */
3089 static struct iv_cand *
3090 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3091 enum iv_position pos, struct iv_use *use,
3092 gimple *incremented_at, struct iv *orig_iv = NULL,
3093 bool doloop = false)
3095 unsigned i;
3096 struct iv_cand *cand = NULL;
3097 tree type, orig_type;
3099 gcc_assert (base && step);
3101 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3102 live, but the ivopts code may replace a real pointer with one
3103 pointing before or after the memory block that is then adjusted
3104 into the memory block during the loop. FIXME: It would likely be
3105 better to actually force the pointer live and still use ivopts;
3106 for example, it would be enough to write the pointer into memory
3107 and keep it there until after the loop. */
3108 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3109 return NULL;
3111 /* If BASE contains undefined SSA names make sure we only record
3112 the original IV. */
3113 bool involves_undefs = false;
3114 if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3116 if (pos != IP_ORIGINAL)
3117 return NULL;
3118 important = false;
3119 involves_undefs = true;
3122 /* For non-original variables, make sure their values are computed in a type
3123 that does not invoke undefined behavior on overflows (since in general,
3124 we cannot prove that these induction variables are non-wrapping). */
3125 if (pos != IP_ORIGINAL)
3127 orig_type = TREE_TYPE (base);
3128 type = generic_type_for (orig_type);
3129 if (type != orig_type)
3131 base = fold_convert (type, base);
3132 step = fold_convert (type, step);
3136 for (i = 0; i < data->vcands.length (); i++)
3138 cand = data->vcands[i];
3140 if (cand->pos != pos)
3141 continue;
3143 if (cand->incremented_at != incremented_at
3144 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3145 && cand->ainc_use != use))
3146 continue;
3148 if (operand_equal_p (base, cand->iv->base, 0)
3149 && operand_equal_p (step, cand->iv->step, 0)
3150 && (TYPE_PRECISION (TREE_TYPE (base))
3151 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3152 break;
3155 if (i == data->vcands.length ())
3157 cand = XCNEW (struct iv_cand);
3158 cand->id = i;
3159 cand->iv = alloc_iv (data, base, step);
3160 cand->pos = pos;
3161 if (pos != IP_ORIGINAL)
3163 if (doloop)
3164 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3165 else
3166 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3167 cand->var_after = cand->var_before;
3169 cand->important = important;
3170 cand->involves_undefs = involves_undefs;
3171 cand->incremented_at = incremented_at;
3172 cand->doloop_p = doloop;
3173 data->vcands.safe_push (cand);
3175 if (!poly_int_tree_p (step))
3177 find_inv_vars (data, &step, &cand->inv_vars);
3179 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3180 /* Share bitmap between inv_vars and inv_exprs for cand. */
3181 if (inv_expr != NULL)
3183 cand->inv_exprs = cand->inv_vars;
3184 cand->inv_vars = NULL;
3185 if (cand->inv_exprs)
3186 bitmap_clear (cand->inv_exprs);
3187 else
3188 cand->inv_exprs = BITMAP_ALLOC (NULL);
3190 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3194 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3195 cand->ainc_use = use;
3196 else
3197 cand->ainc_use = NULL;
3199 cand->orig_iv = orig_iv;
3200 if (dump_file && (dump_flags & TDF_DETAILS))
3201 dump_cand (dump_file, cand);
3204 cand->important |= important;
3205 cand->doloop_p |= doloop;
3207 /* Relate candidate to the group for which it is added. */
3208 if (use)
3209 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3211 return cand;
3214 /* Returns true if incrementing the induction variable at the end of the LOOP
3215 is allowed.
3217 The purpose is to avoid splitting latch edge with a biv increment, thus
3218 creating a jump, possibly confusing other optimization passes and leaving
3219 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3220 available (so we do not have a better alternative), or if the latch edge
3221 is already nonempty. */
3223 static bool
3224 allow_ip_end_pos_p (class loop *loop)
3226 if (!ip_normal_pos (loop))
3227 return true;
3229 if (!empty_block_p (ip_end_pos (loop)))
3230 return true;
3232 return false;
3235 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3236 Important field is set to IMPORTANT. */
3238 static void
3239 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3240 bool important, struct iv_use *use)
3242 basic_block use_bb = gimple_bb (use->stmt);
3243 machine_mode mem_mode;
3244 unsigned HOST_WIDE_INT cstepi;
3246 /* If we insert the increment in any position other than the standard
3247 ones, we must ensure that it is incremented once per iteration.
3248 It must not be in an inner nested loop, or one side of an if
3249 statement. */
3250 if (use_bb->loop_father != data->current_loop
3251 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3252 || stmt_can_throw_internal (cfun, use->stmt)
3253 || !cst_and_fits_in_hwi (step))
3254 return;
3256 cstepi = int_cst_value (step);
3258 mem_mode = TYPE_MODE (use->mem_type);
3259 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3260 || USE_STORE_PRE_INCREMENT (mem_mode))
3261 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3262 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3263 || USE_STORE_PRE_DECREMENT (mem_mode))
3264 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3266 enum tree_code code = MINUS_EXPR;
3267 tree new_base;
3268 tree new_step = step;
3270 if (POINTER_TYPE_P (TREE_TYPE (base)))
3272 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3273 code = POINTER_PLUS_EXPR;
3275 else
3276 new_step = fold_convert (TREE_TYPE (base), new_step);
3277 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3278 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3279 use->stmt);
3281 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3282 || USE_STORE_POST_INCREMENT (mem_mode))
3283 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3284 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3285 || USE_STORE_POST_DECREMENT (mem_mode))
3286 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3288 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3289 use->stmt);
3293 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3294 position to POS. If USE is not NULL, the candidate is set as related to
3295 it. The candidate computation is scheduled before exit condition and at
3296 the end of loop. */
3298 static void
3299 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3300 struct iv_use *use, struct iv *orig_iv = NULL,
3301 bool doloop = false)
3303 if (ip_normal_pos (data->current_loop))
3304 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3305 doloop);
3306 /* Exclude doloop candidate here since it requires decrement then comparison
3307 and jump, the IP_END position doesn't match. */
3308 if (!doloop && ip_end_pos (data->current_loop)
3309 && allow_ip_end_pos_p (data->current_loop))
3310 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3313 /* Adds standard iv candidates. */
3315 static void
3316 add_standard_iv_candidates (struct ivopts_data *data)
3318 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3320 /* The same for a double-integer type if it is still fast enough. */
3321 if (TYPE_PRECISION
3322 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3323 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3324 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3325 build_int_cst (long_integer_type_node, 1), true, NULL);
3327 /* The same for a double-integer type if it is still fast enough. */
3328 if (TYPE_PRECISION
3329 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3330 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3331 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3332 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3336 /* Adds candidates bases on the old induction variable IV. */
3338 static void
3339 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3341 gimple *phi;
3342 tree def;
3343 struct iv_cand *cand;
3345 /* Check if this biv is used in address type use. */
3346 if (iv->no_overflow && iv->have_address_use
3347 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3348 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3350 tree base = fold_convert (sizetype, iv->base);
3351 tree step = fold_convert (sizetype, iv->step);
3353 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3354 add_candidate (data, base, step, true, NULL, iv);
3355 /* Add iv cand of the original type only if it has nonlinear use. */
3356 if (iv->nonlin_use)
3357 add_candidate (data, iv->base, iv->step, true, NULL);
3359 else
3360 add_candidate (data, iv->base, iv->step, true, NULL);
3362 /* The same, but with initial value zero. */
3363 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3364 add_candidate (data, size_int (0), iv->step, true, NULL);
3365 else
3366 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3367 iv->step, true, NULL);
3369 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3370 if (gimple_code (phi) == GIMPLE_PHI)
3372 /* Additionally record the possibility of leaving the original iv
3373 untouched. */
3374 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3375 /* Don't add candidate if it's from another PHI node because
3376 it's an affine iv appearing in the form of PEELED_CHREC. */
3377 phi = SSA_NAME_DEF_STMT (def);
3378 if (gimple_code (phi) != GIMPLE_PHI)
3380 cand = add_candidate_1 (data,
3381 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3382 SSA_NAME_DEF_STMT (def));
3383 if (cand)
3385 cand->var_before = iv->ssa_name;
3386 cand->var_after = def;
3389 else
3390 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3394 /* Adds candidates based on the old induction variables. */
3396 static void
3397 add_iv_candidate_for_bivs (struct ivopts_data *data)
3399 unsigned i;
3400 struct iv *iv;
3401 bitmap_iterator bi;
3403 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3405 iv = ver_info (data, i)->iv;
3406 if (iv && iv->biv_p && !integer_zerop (iv->step))
3407 add_iv_candidate_for_biv (data, iv);
3411 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3413 static void
3414 record_common_cand (struct ivopts_data *data, tree base,
3415 tree step, struct iv_use *use)
3417 class iv_common_cand ent;
3418 class iv_common_cand **slot;
3420 ent.base = base;
3421 ent.step = step;
3422 ent.hash = iterative_hash_expr (base, 0);
3423 ent.hash = iterative_hash_expr (step, ent.hash);
3425 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3426 if (*slot == NULL)
3428 *slot = new iv_common_cand ();
3429 (*slot)->base = base;
3430 (*slot)->step = step;
3431 (*slot)->uses.create (8);
3432 (*slot)->hash = ent.hash;
3433 data->iv_common_cands.safe_push ((*slot));
3436 gcc_assert (use != NULL);
3437 (*slot)->uses.safe_push (use);
3438 return;
3441 /* Comparison function used to sort common candidates. */
3443 static int
3444 common_cand_cmp (const void *p1, const void *p2)
3446 unsigned n1, n2;
3447 const class iv_common_cand *const *const ccand1
3448 = (const class iv_common_cand *const *)p1;
3449 const class iv_common_cand *const *const ccand2
3450 = (const class iv_common_cand *const *)p2;
3452 n1 = (*ccand1)->uses.length ();
3453 n2 = (*ccand2)->uses.length ();
3454 return n2 - n1;
3457 /* Adds IV candidates based on common candidated recorded. */
3459 static void
3460 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3462 unsigned i, j;
3463 struct iv_cand *cand_1, *cand_2;
3465 data->iv_common_cands.qsort (common_cand_cmp);
3466 for (i = 0; i < data->iv_common_cands.length (); i++)
3468 class iv_common_cand *ptr = data->iv_common_cands[i];
3470 /* Only add IV candidate if it's derived from multiple uses. */
3471 if (ptr->uses.length () <= 1)
3472 break;
3474 cand_1 = NULL;
3475 cand_2 = NULL;
3476 if (ip_normal_pos (data->current_loop))
3477 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3478 false, IP_NORMAL, NULL, NULL);
3480 if (ip_end_pos (data->current_loop)
3481 && allow_ip_end_pos_p (data->current_loop))
3482 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3483 false, IP_END, NULL, NULL);
3485 /* Bind deriving uses and the new candidates. */
3486 for (j = 0; j < ptr->uses.length (); j++)
3488 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3489 if (cand_1)
3490 bitmap_set_bit (group->related_cands, cand_1->id);
3491 if (cand_2)
3492 bitmap_set_bit (group->related_cands, cand_2->id);
3496 /* Release data since it is useless from this point. */
3497 data->iv_common_cand_tab->empty ();
3498 data->iv_common_cands.truncate (0);
3501 /* Adds candidates based on the value of USE's iv. */
3503 static void
3504 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3506 poly_uint64 offset;
3507 tree base;
3508 struct iv *iv = use->iv;
3509 tree basetype = TREE_TYPE (iv->base);
3511 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3512 precision types, instead, add candidate for the corresponding scev in
3513 unsigned type with the same precision. See PR93674 for more info. */
3514 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3515 || !type_has_mode_precision_p (basetype))
3517 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3518 TYPE_UNSIGNED (basetype));
3519 add_candidate (data, fold_convert (basetype, iv->base),
3520 fold_convert (basetype, iv->step), false, NULL);
3521 return;
3524 add_candidate (data, iv->base, iv->step, false, use);
3526 /* Record common candidate for use in case it can be shared by others. */
3527 record_common_cand (data, iv->base, iv->step, use);
3529 /* Record common candidate with initial value zero. */
3530 basetype = TREE_TYPE (iv->base);
3531 if (POINTER_TYPE_P (basetype))
3532 basetype = sizetype;
3533 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3535 /* Compare the cost of an address with an unscaled index with the cost of
3536 an address with a scaled index and add candidate if useful. */
3537 poly_int64 step;
3538 if (use != NULL
3539 && poly_int_tree_p (iv->step, &step)
3540 && address_p (use->type))
3542 poly_int64 new_step;
3543 unsigned int fact = preferred_mem_scale_factor
3544 (use->iv->base,
3545 TYPE_MODE (use->mem_type),
3546 optimize_loop_for_speed_p (data->current_loop));
3548 if (fact != 1
3549 && multiple_p (step, fact, &new_step))
3550 add_candidate (data, size_int (0),
3551 wide_int_to_tree (sizetype, new_step),
3552 true, NULL);
3555 /* Record common candidate with constant offset stripped in base.
3556 Like the use itself, we also add candidate directly for it. */
3557 base = strip_offset (iv->base, &offset);
3558 if (maybe_ne (offset, 0U) || base != iv->base)
3560 record_common_cand (data, base, iv->step, use);
3561 add_candidate (data, base, iv->step, false, use);
3564 /* Record common candidate with base_object removed in base. */
3565 base = iv->base;
3566 STRIP_NOPS (base);
3567 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3569 tree step = iv->step;
3571 STRIP_NOPS (step);
3572 base = TREE_OPERAND (base, 1);
3573 step = fold_convert (sizetype, step);
3574 record_common_cand (data, base, step, use);
3575 /* Also record common candidate with offset stripped. */
3576 tree alt_base, alt_offset;
3577 split_constant_offset (base, &alt_base, &alt_offset);
3578 if (!integer_zerop (alt_offset))
3579 record_common_cand (data, alt_base, step, use);
3582 /* At last, add auto-incremental candidates. Make such variables
3583 important since other iv uses with same base object may be based
3584 on it. */
3585 if (use != NULL && address_p (use->type))
3586 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3589 /* Adds candidates based on the uses. */
3591 static void
3592 add_iv_candidate_for_groups (struct ivopts_data *data)
3594 unsigned i;
3596 /* Only add candidate for the first use in group. */
3597 for (i = 0; i < data->vgroups.length (); i++)
3599 struct iv_group *group = data->vgroups[i];
3601 gcc_assert (group->vuses[0] != NULL);
3602 add_iv_candidate_for_use (data, group->vuses[0]);
3604 add_iv_candidate_derived_from_uses (data);
3607 /* Record important candidates and add them to related_cands bitmaps. */
3609 static void
3610 record_important_candidates (struct ivopts_data *data)
3612 unsigned i;
3613 struct iv_group *group;
3615 for (i = 0; i < data->vcands.length (); i++)
3617 struct iv_cand *cand = data->vcands[i];
3619 if (cand->important)
3620 bitmap_set_bit (data->important_candidates, i);
3623 data->consider_all_candidates = (data->vcands.length ()
3624 <= CONSIDER_ALL_CANDIDATES_BOUND);
3626 /* Add important candidates to groups' related_cands bitmaps. */
3627 for (i = 0; i < data->vgroups.length (); i++)
3629 group = data->vgroups[i];
3630 bitmap_ior_into (group->related_cands, data->important_candidates);
3634 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3635 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3636 we allocate a simple list to every use. */
3638 static void
3639 alloc_use_cost_map (struct ivopts_data *data)
3641 unsigned i, size, s;
3643 for (i = 0; i < data->vgroups.length (); i++)
3645 struct iv_group *group = data->vgroups[i];
3647 if (data->consider_all_candidates)
3648 size = data->vcands.length ();
3649 else
3651 s = bitmap_count_bits (group->related_cands);
3653 /* Round up to the power of two, so that moduling by it is fast. */
3654 size = s ? (1 << ceil_log2 (s)) : 1;
3657 group->n_map_members = size;
3658 group->cost_map = XCNEWVEC (class cost_pair, size);
3662 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3663 on invariants INV_VARS and that the value used in expressing it is
3664 VALUE, and in case of iv elimination the comparison operator is COMP. */
3666 static void
3667 set_group_iv_cost (struct ivopts_data *data,
3668 struct iv_group *group, struct iv_cand *cand,
3669 comp_cost cost, bitmap inv_vars, tree value,
3670 enum tree_code comp, bitmap inv_exprs)
3672 unsigned i, s;
3674 if (cost.infinite_cost_p ())
3676 BITMAP_FREE (inv_vars);
3677 BITMAP_FREE (inv_exprs);
3678 return;
3681 if (data->consider_all_candidates)
3683 group->cost_map[cand->id].cand = cand;
3684 group->cost_map[cand->id].cost = cost;
3685 group->cost_map[cand->id].inv_vars = inv_vars;
3686 group->cost_map[cand->id].inv_exprs = inv_exprs;
3687 group->cost_map[cand->id].value = value;
3688 group->cost_map[cand->id].comp = comp;
3689 return;
3692 /* n_map_members is a power of two, so this computes modulo. */
3693 s = cand->id & (group->n_map_members - 1);
3694 for (i = s; i < group->n_map_members; i++)
3695 if (!group->cost_map[i].cand)
3696 goto found;
3697 for (i = 0; i < s; i++)
3698 if (!group->cost_map[i].cand)
3699 goto found;
3701 gcc_unreachable ();
3703 found:
3704 group->cost_map[i].cand = cand;
3705 group->cost_map[i].cost = cost;
3706 group->cost_map[i].inv_vars = inv_vars;
3707 group->cost_map[i].inv_exprs = inv_exprs;
3708 group->cost_map[i].value = value;
3709 group->cost_map[i].comp = comp;
3712 /* Gets cost of (GROUP, CAND) pair. */
3714 static class cost_pair *
3715 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3716 struct iv_cand *cand)
3718 unsigned i, s;
3719 class cost_pair *ret;
3721 if (!cand)
3722 return NULL;
3724 if (data->consider_all_candidates)
3726 ret = group->cost_map + cand->id;
3727 if (!ret->cand)
3728 return NULL;
3730 return ret;
3733 /* n_map_members is a power of two, so this computes modulo. */
3734 s = cand->id & (group->n_map_members - 1);
3735 for (i = s; i < group->n_map_members; i++)
3736 if (group->cost_map[i].cand == cand)
3737 return group->cost_map + i;
3738 else if (group->cost_map[i].cand == NULL)
3739 return NULL;
3740 for (i = 0; i < s; i++)
3741 if (group->cost_map[i].cand == cand)
3742 return group->cost_map + i;
3743 else if (group->cost_map[i].cand == NULL)
3744 return NULL;
3746 return NULL;
3749 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3750 static rtx
3751 produce_memory_decl_rtl (tree obj, int *regno)
3753 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3754 machine_mode address_mode = targetm.addr_space.address_mode (as);
3755 rtx x;
3757 gcc_assert (obj);
3758 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3760 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3761 x = gen_rtx_SYMBOL_REF (address_mode, name);
3762 SET_SYMBOL_REF_DECL (x, obj);
3763 x = gen_rtx_MEM (DECL_MODE (obj), x);
3764 set_mem_addr_space (x, as);
3765 targetm.encode_section_info (obj, x, true);
3767 else
3769 x = gen_raw_REG (address_mode, (*regno)++);
3770 x = gen_rtx_MEM (DECL_MODE (obj), x);
3771 set_mem_addr_space (x, as);
3774 return x;
3777 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3778 walk_tree. DATA contains the actual fake register number. */
3780 static tree
3781 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3783 tree obj = NULL_TREE;
3784 rtx x = NULL_RTX;
3785 int *regno = (int *) data;
3787 switch (TREE_CODE (*expr_p))
3789 case ADDR_EXPR:
3790 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3791 handled_component_p (*expr_p);
3792 expr_p = &TREE_OPERAND (*expr_p, 0))
3793 continue;
3794 obj = *expr_p;
3795 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3796 x = produce_memory_decl_rtl (obj, regno);
3797 break;
3799 case SSA_NAME:
3800 *ws = 0;
3801 obj = SSA_NAME_VAR (*expr_p);
3802 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3803 if (!obj)
3804 return NULL_TREE;
3805 if (!DECL_RTL_SET_P (obj))
3806 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3807 break;
3809 case VAR_DECL:
3810 case PARM_DECL:
3811 case RESULT_DECL:
3812 *ws = 0;
3813 obj = *expr_p;
3815 if (DECL_RTL_SET_P (obj))
3816 break;
3818 if (DECL_MODE (obj) == BLKmode)
3819 x = produce_memory_decl_rtl (obj, regno);
3820 else
3821 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3823 break;
3825 default:
3826 break;
3829 if (x)
3831 decl_rtl_to_reset.safe_push (obj);
3832 SET_DECL_RTL (obj, x);
3835 return NULL_TREE;
3838 /* Predict whether the given loop will be transformed in the RTL
3839 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3840 This is only for target independent checks, see targetm.predict_doloop_p
3841 for the target dependent ones.
3843 Note that according to some initial investigation, some checks like costly
3844 niter check and invalid stmt scanning don't have much gains among general
3845 cases, so keep this as simple as possible first.
3847 Some RTL specific checks seems unable to be checked in gimple, if any new
3848 checks or easy checks _are_ missing here, please add them. */
3850 static bool
3851 generic_predict_doloop_p (struct ivopts_data *data)
3853 class loop *loop = data->current_loop;
3855 /* Call target hook for target dependent checks. */
3856 if (!targetm.predict_doloop_p (loop))
3858 if (dump_file && (dump_flags & TDF_DETAILS))
3859 fprintf (dump_file, "Predict doloop failure due to"
3860 " target specific checks.\n");
3861 return false;
3864 /* Similar to doloop_optimize, check iteration description to know it's
3865 suitable or not. Keep it as simple as possible, feel free to extend it
3866 if you find any multiple exits cases matter. */
3867 edge exit = single_dom_exit (loop);
3868 class tree_niter_desc *niter_desc;
3869 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3871 if (dump_file && (dump_flags & TDF_DETAILS))
3872 fprintf (dump_file, "Predict doloop failure due to"
3873 " unexpected niters.\n");
3874 return false;
3877 /* Similar to doloop_optimize, check whether iteration count too small
3878 and not profitable. */
3879 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3880 if (est_niter == -1)
3881 est_niter = get_likely_max_loop_iterations_int (loop);
3882 if (est_niter >= 0 && est_niter < 3)
3884 if (dump_file && (dump_flags & TDF_DETAILS))
3885 fprintf (dump_file,
3886 "Predict doloop failure due to"
3887 " too few iterations (%u).\n",
3888 (unsigned int) est_niter);
3889 return false;
3892 return true;
3895 /* Determines cost of the computation of EXPR. */
3897 static unsigned
3898 computation_cost (tree expr, bool speed)
3900 rtx_insn *seq;
3901 rtx rslt;
3902 tree type = TREE_TYPE (expr);
3903 unsigned cost;
3904 /* Avoid using hard regs in ways which may be unsupported. */
3905 int regno = LAST_VIRTUAL_REGISTER + 1;
3906 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3907 enum node_frequency real_frequency = node->frequency;
3909 node->frequency = NODE_FREQUENCY_NORMAL;
3910 crtl->maybe_hot_insn_p = speed;
3911 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3912 start_sequence ();
3913 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3914 seq = get_insns ();
3915 end_sequence ();
3916 default_rtl_profile ();
3917 node->frequency = real_frequency;
3919 cost = seq_cost (seq, speed);
3920 if (MEM_P (rslt))
3921 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3922 TYPE_ADDR_SPACE (type), speed);
3923 else if (!REG_P (rslt))
3924 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3926 return cost;
3929 /* Returns variable containing the value of candidate CAND at statement AT. */
3931 static tree
3932 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3934 if (stmt_after_increment (loop, cand, stmt))
3935 return cand->var_after;
3936 else
3937 return cand->var_before;
3940 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3941 same precision that is at least as wide as the precision of TYPE, stores
3942 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3943 type of A and B. */
3945 static tree
3946 determine_common_wider_type (tree *a, tree *b)
3948 tree wider_type = NULL;
3949 tree suba, subb;
3950 tree atype = TREE_TYPE (*a);
3952 if (CONVERT_EXPR_P (*a))
3954 suba = TREE_OPERAND (*a, 0);
3955 wider_type = TREE_TYPE (suba);
3956 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3957 return atype;
3959 else
3960 return atype;
3962 if (CONVERT_EXPR_P (*b))
3964 subb = TREE_OPERAND (*b, 0);
3965 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3966 return atype;
3968 else
3969 return atype;
3971 *a = suba;
3972 *b = subb;
3973 return wider_type;
3976 /* Determines the expression by that USE is expressed from induction variable
3977 CAND at statement AT in LOOP. The expression is stored in two parts in a
3978 decomposed form. The invariant part is stored in AFF_INV; while variant
3979 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3980 non-null. Returns false if USE cannot be expressed using CAND. */
3982 static bool
3983 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3984 struct iv_cand *cand, class aff_tree *aff_inv,
3985 class aff_tree *aff_var, widest_int *prat = NULL)
3987 tree ubase = use->iv->base, ustep = use->iv->step;
3988 tree cbase = cand->iv->base, cstep = cand->iv->step;
3989 tree common_type, uutype, var, cstep_common;
3990 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3991 aff_tree aff_cbase;
3992 widest_int rat;
3994 /* We must have a precision to express the values of use. */
3995 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3996 return false;
3998 var = var_at_stmt (loop, cand, at);
3999 uutype = unsigned_type_for (utype);
4001 /* If the conversion is not noop, perform it. */
4002 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4004 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4005 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4007 tree inner_base, inner_step, inner_type;
4008 inner_base = TREE_OPERAND (cbase, 0);
4009 if (CONVERT_EXPR_P (cstep))
4010 inner_step = TREE_OPERAND (cstep, 0);
4011 else
4012 inner_step = cstep;
4014 inner_type = TREE_TYPE (inner_base);
4015 /* If candidate is added from a biv whose type is smaller than
4016 ctype, we know both candidate and the biv won't overflow.
4017 In this case, it's safe to skip the convertion in candidate.
4018 As an example, (unsigned short)((unsigned long)A) equals to
4019 (unsigned short)A, if A has a type no larger than short. */
4020 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4022 cbase = inner_base;
4023 cstep = inner_step;
4026 cbase = fold_convert (uutype, cbase);
4027 cstep = fold_convert (uutype, cstep);
4028 var = fold_convert (uutype, var);
4031 /* Ratio is 1 when computing the value of biv cand by itself.
4032 We can't rely on constant_multiple_of in this case because the
4033 use is created after the original biv is selected. The call
4034 could fail because of inconsistent fold behavior. See PR68021
4035 for more information. */
4036 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4038 gcc_assert (is_gimple_assign (use->stmt));
4039 gcc_assert (use->iv->ssa_name == cand->var_after);
4040 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4041 rat = 1;
4043 else if (!constant_multiple_of (ustep, cstep, &rat))
4044 return false;
4046 if (prat)
4047 *prat = rat;
4049 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4050 type, we achieve better folding by computing their difference in this
4051 wider type, and cast the result to UUTYPE. We do not need to worry about
4052 overflows, as all the arithmetics will in the end be performed in UUTYPE
4053 anyway. */
4054 common_type = determine_common_wider_type (&ubase, &cbase);
4056 /* use = ubase - ratio * cbase + ratio * var. */
4057 tree_to_aff_combination (ubase, common_type, aff_inv);
4058 tree_to_aff_combination (cbase, common_type, &aff_cbase);
4059 tree_to_aff_combination (var, uutype, aff_var);
4061 /* We need to shift the value if we are after the increment. */
4062 if (stmt_after_increment (loop, cand, at))
4064 aff_tree cstep_aff;
4066 if (common_type != uutype)
4067 cstep_common = fold_convert (common_type, cstep);
4068 else
4069 cstep_common = cstep;
4071 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4072 aff_combination_add (&aff_cbase, &cstep_aff);
4075 aff_combination_scale (&aff_cbase, -rat);
4076 aff_combination_add (aff_inv, &aff_cbase);
4077 if (common_type != uutype)
4078 aff_combination_convert (aff_inv, uutype);
4080 aff_combination_scale (aff_var, rat);
4081 return true;
4084 /* Determines the expression by that USE is expressed from induction variable
4085 CAND at statement AT in LOOP. The expression is stored in a decomposed
4086 form into AFF. Returns false if USE cannot be expressed using CAND. */
4088 static bool
4089 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4090 struct iv_cand *cand, class aff_tree *aff)
4092 aff_tree aff_var;
4094 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4095 return false;
4097 aff_combination_add (aff, &aff_var);
4098 return true;
4101 /* Return the type of USE. */
4103 static tree
4104 get_use_type (struct iv_use *use)
4106 tree base_type = TREE_TYPE (use->iv->base);
4107 tree type;
4109 if (use->type == USE_REF_ADDRESS)
4111 /* The base_type may be a void pointer. Create a pointer type based on
4112 the mem_ref instead. */
4113 type = build_pointer_type (TREE_TYPE (*use->op_p));
4114 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4115 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4117 else
4118 type = base_type;
4120 return type;
4123 /* Determines the expression by that USE is expressed from induction variable
4124 CAND at statement AT in LOOP. The computation is unshared. */
4126 static tree
4127 get_computation_at (class loop *loop, gimple *at,
4128 struct iv_use *use, struct iv_cand *cand)
4130 aff_tree aff;
4131 tree type = get_use_type (use);
4133 if (!get_computation_aff (loop, at, use, cand, &aff))
4134 return NULL_TREE;
4135 unshare_aff_combination (&aff);
4136 return fold_convert (type, aff_combination_to_tree (&aff));
4139 /* Like get_computation_at, but try harder, even if the computation
4140 is more expensive. Intended for debug stmts. */
4142 static tree
4143 get_debug_computation_at (class loop *loop, gimple *at,
4144 struct iv_use *use, struct iv_cand *cand)
4146 if (tree ret = get_computation_at (loop, at, use, cand))
4147 return ret;
4149 tree ubase = use->iv->base, ustep = use->iv->step;
4150 tree cbase = cand->iv->base, cstep = cand->iv->step;
4151 tree var;
4152 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4153 widest_int rat;
4155 /* We must have a precision to express the values of use. */
4156 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4157 return NULL_TREE;
4159 /* Try to handle the case that get_computation_at doesn't,
4160 try to express
4161 use = ubase + (var - cbase) / ratio. */
4162 if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4163 &rat))
4164 return NULL_TREE;
4166 bool neg_p = false;
4167 if (wi::neg_p (rat))
4169 if (TYPE_UNSIGNED (ctype))
4170 return NULL_TREE;
4171 neg_p = true;
4172 rat = wi::neg (rat);
4175 /* If both IVs can wrap around and CAND doesn't have a power of two step,
4176 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4177 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4178 uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4179 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4180 if (!use->iv->no_overflow
4181 && !cand->iv->no_overflow
4182 && !integer_pow2p (cstep))
4183 return NULL_TREE;
4185 int bits = wi::exact_log2 (rat);
4186 if (bits == -1)
4187 bits = wi::floor_log2 (rat) + 1;
4188 if (!cand->iv->no_overflow
4189 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4190 return NULL_TREE;
4192 var = var_at_stmt (loop, cand, at);
4194 if (POINTER_TYPE_P (ctype))
4196 ctype = unsigned_type_for (ctype);
4197 cbase = fold_convert (ctype, cbase);
4198 cstep = fold_convert (ctype, cstep);
4199 var = fold_convert (ctype, var);
4202 if (stmt_after_increment (loop, cand, at))
4203 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4204 unshare_expr (cstep));
4206 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4207 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4208 wide_int_to_tree (TREE_TYPE (var), rat));
4209 if (POINTER_TYPE_P (utype))
4211 var = fold_convert (sizetype, var);
4212 if (neg_p)
4213 var = fold_build1 (NEGATE_EXPR, sizetype, var);
4214 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4216 else
4218 var = fold_convert (utype, var);
4219 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4220 ubase, var);
4222 return var;
4225 /* Adjust the cost COST for being in loop setup rather than loop body.
4226 If we're optimizing for space, the loop setup overhead is constant;
4227 if we're optimizing for speed, amortize it over the per-iteration cost.
4228 If ROUND_UP_P is true, the result is round up rather than to zero when
4229 optimizing for speed. */
4230 static int64_t
4231 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4232 bool round_up_p = false)
4234 if (cost == INFTY)
4235 return cost;
4236 else if (optimize_loop_for_speed_p (data->current_loop))
4238 int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4239 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4241 else
4242 return cost;
4245 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4246 EXPR operand holding the shift. COST0 and COST1 are the costs for
4247 calculating the operands of EXPR. Returns true if successful, and returns
4248 the cost in COST. */
4250 static bool
4251 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4252 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4254 comp_cost res;
4255 tree op1 = TREE_OPERAND (expr, 1);
4256 tree cst = TREE_OPERAND (mult, 1);
4257 tree multop = TREE_OPERAND (mult, 0);
4258 int m = exact_log2 (int_cst_value (cst));
4259 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4260 int as_cost, sa_cost;
4261 bool mult_in_op1;
4263 if (!(m >= 0 && m < maxm))
4264 return false;
4266 STRIP_NOPS (op1);
4267 mult_in_op1 = operand_equal_p (op1, mult, 0);
4269 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4271 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4272 use that in preference to a shift insn followed by an add insn. */
4273 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4274 ? shiftadd_cost (speed, mode, m)
4275 : (mult_in_op1
4276 ? shiftsub1_cost (speed, mode, m)
4277 : shiftsub0_cost (speed, mode, m)));
4279 res = comp_cost (MIN (as_cost, sa_cost), 0);
4280 res += (mult_in_op1 ? cost0 : cost1);
4282 STRIP_NOPS (multop);
4283 if (!is_gimple_val (multop))
4284 res += force_expr_to_var_cost (multop, speed);
4286 *cost = res;
4287 return true;
4290 /* Estimates cost of forcing expression EXPR into a variable. */
4292 static comp_cost
4293 force_expr_to_var_cost (tree expr, bool speed)
4295 static bool costs_initialized = false;
4296 static unsigned integer_cost [2];
4297 static unsigned symbol_cost [2];
4298 static unsigned address_cost [2];
4299 tree op0, op1;
4300 comp_cost cost0, cost1, cost;
4301 machine_mode mode;
4302 scalar_int_mode int_mode;
4304 if (!costs_initialized)
4306 tree type = build_pointer_type (integer_type_node);
4307 tree var, addr;
4308 rtx x;
4309 int i;
4311 var = create_tmp_var_raw (integer_type_node, "test_var");
4312 TREE_STATIC (var) = 1;
4313 x = produce_memory_decl_rtl (var, NULL);
4314 SET_DECL_RTL (var, x);
4316 addr = build1 (ADDR_EXPR, type, var);
4319 for (i = 0; i < 2; i++)
4321 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4322 2000), i);
4324 symbol_cost[i] = computation_cost (addr, i) + 1;
4326 address_cost[i]
4327 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4328 if (dump_file && (dump_flags & TDF_DETAILS))
4330 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4331 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4332 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4333 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4334 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4335 fprintf (dump_file, "\n");
4339 costs_initialized = true;
4342 STRIP_NOPS (expr);
4344 if (SSA_VAR_P (expr))
4345 return no_cost;
4347 if (is_gimple_min_invariant (expr))
4349 if (poly_int_tree_p (expr))
4350 return comp_cost (integer_cost [speed], 0);
4352 if (TREE_CODE (expr) == ADDR_EXPR)
4354 tree obj = TREE_OPERAND (expr, 0);
4356 if (VAR_P (obj)
4357 || TREE_CODE (obj) == PARM_DECL
4358 || TREE_CODE (obj) == RESULT_DECL)
4359 return comp_cost (symbol_cost [speed], 0);
4362 return comp_cost (address_cost [speed], 0);
4365 switch (TREE_CODE (expr))
4367 case POINTER_PLUS_EXPR:
4368 case PLUS_EXPR:
4369 case MINUS_EXPR:
4370 case MULT_EXPR:
4371 case EXACT_DIV_EXPR:
4372 case TRUNC_DIV_EXPR:
4373 case BIT_AND_EXPR:
4374 case BIT_IOR_EXPR:
4375 case LSHIFT_EXPR:
4376 case RSHIFT_EXPR:
4377 op0 = TREE_OPERAND (expr, 0);
4378 op1 = TREE_OPERAND (expr, 1);
4379 STRIP_NOPS (op0);
4380 STRIP_NOPS (op1);
4381 break;
4383 CASE_CONVERT:
4384 case NEGATE_EXPR:
4385 case BIT_NOT_EXPR:
4386 op0 = TREE_OPERAND (expr, 0);
4387 STRIP_NOPS (op0);
4388 op1 = NULL_TREE;
4389 break;
4390 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4391 introduce COND_EXPR for IV base, need to support better cost estimation
4392 for this COND_EXPR and tcc_comparison. */
4393 case COND_EXPR:
4394 op0 = TREE_OPERAND (expr, 1);
4395 STRIP_NOPS (op0);
4396 op1 = TREE_OPERAND (expr, 2);
4397 STRIP_NOPS (op1);
4398 break;
4399 case LT_EXPR:
4400 case LE_EXPR:
4401 case GT_EXPR:
4402 case GE_EXPR:
4403 case EQ_EXPR:
4404 case NE_EXPR:
4405 case UNORDERED_EXPR:
4406 case ORDERED_EXPR:
4407 case UNLT_EXPR:
4408 case UNLE_EXPR:
4409 case UNGT_EXPR:
4410 case UNGE_EXPR:
4411 case UNEQ_EXPR:
4412 case LTGT_EXPR:
4413 case MAX_EXPR:
4414 case MIN_EXPR:
4415 op0 = TREE_OPERAND (expr, 0);
4416 STRIP_NOPS (op0);
4417 op1 = TREE_OPERAND (expr, 1);
4418 STRIP_NOPS (op1);
4419 break;
4421 default:
4422 /* Just an arbitrary value, FIXME. */
4423 return comp_cost (target_spill_cost[speed], 0);
4426 if (op0 == NULL_TREE
4427 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4428 cost0 = no_cost;
4429 else
4430 cost0 = force_expr_to_var_cost (op0, speed);
4432 if (op1 == NULL_TREE
4433 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4434 cost1 = no_cost;
4435 else
4436 cost1 = force_expr_to_var_cost (op1, speed);
4438 mode = TYPE_MODE (TREE_TYPE (expr));
4439 switch (TREE_CODE (expr))
4441 case POINTER_PLUS_EXPR:
4442 case PLUS_EXPR:
4443 case MINUS_EXPR:
4444 case NEGATE_EXPR:
4445 cost = comp_cost (add_cost (speed, mode), 0);
4446 if (TREE_CODE (expr) != NEGATE_EXPR)
4448 tree mult = NULL_TREE;
4449 comp_cost sa_cost;
4450 if (TREE_CODE (op1) == MULT_EXPR)
4451 mult = op1;
4452 else if (TREE_CODE (op0) == MULT_EXPR)
4453 mult = op0;
4455 if (mult != NULL_TREE
4456 && is_a <scalar_int_mode> (mode, &int_mode)
4457 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4458 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4459 speed, &sa_cost))
4460 return sa_cost;
4462 break;
4464 CASE_CONVERT:
4466 tree inner_mode, outer_mode;
4467 outer_mode = TREE_TYPE (expr);
4468 inner_mode = TREE_TYPE (op0);
4469 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4470 TYPE_MODE (inner_mode), speed), 0);
4472 break;
4474 case MULT_EXPR:
4475 if (cst_and_fits_in_hwi (op0))
4476 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4477 mode, speed), 0);
4478 else if (cst_and_fits_in_hwi (op1))
4479 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4480 mode, speed), 0);
4481 else
4482 return comp_cost (target_spill_cost [speed], 0);
4483 break;
4485 case EXACT_DIV_EXPR:
4486 case TRUNC_DIV_EXPR:
4487 /* Division by power of two is usually cheap, so we allow it. Forbid
4488 anything else. */
4489 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4490 cost = comp_cost (add_cost (speed, mode), 0);
4491 else
4492 cost = comp_cost (target_spill_cost[speed], 0);
4493 break;
4495 case BIT_AND_EXPR:
4496 case BIT_IOR_EXPR:
4497 case BIT_NOT_EXPR:
4498 case LSHIFT_EXPR:
4499 case RSHIFT_EXPR:
4500 cost = comp_cost (add_cost (speed, mode), 0);
4501 break;
4502 case COND_EXPR:
4503 op0 = TREE_OPERAND (expr, 0);
4504 STRIP_NOPS (op0);
4505 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4506 || CONSTANT_CLASS_P (op0))
4507 cost = no_cost;
4508 else
4509 cost = force_expr_to_var_cost (op0, speed);
4510 break;
4511 case LT_EXPR:
4512 case LE_EXPR:
4513 case GT_EXPR:
4514 case GE_EXPR:
4515 case EQ_EXPR:
4516 case NE_EXPR:
4517 case UNORDERED_EXPR:
4518 case ORDERED_EXPR:
4519 case UNLT_EXPR:
4520 case UNLE_EXPR:
4521 case UNGT_EXPR:
4522 case UNGE_EXPR:
4523 case UNEQ_EXPR:
4524 case LTGT_EXPR:
4525 case MAX_EXPR:
4526 case MIN_EXPR:
4527 /* Simply use add cost for now, FIXME if there is some more accurate cost
4528 evaluation way. */
4529 cost = comp_cost (add_cost (speed, mode), 0);
4530 break;
4532 default:
4533 gcc_unreachable ();
4536 cost += cost0;
4537 cost += cost1;
4538 return cost;
4541 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4542 invariants the computation depends on. */
4544 static comp_cost
4545 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4547 if (!expr)
4548 return no_cost;
4550 find_inv_vars (data, &expr, inv_vars);
4551 return force_expr_to_var_cost (expr, data->speed);
4554 /* Returns cost of auto-modifying address expression in shape base + offset.
4555 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4556 address expression. The address expression has ADDR_MODE in addr space
4557 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4558 speed or size. */
4560 enum ainc_type
4562 AINC_PRE_INC, /* Pre increment. */
4563 AINC_PRE_DEC, /* Pre decrement. */
4564 AINC_POST_INC, /* Post increment. */
4565 AINC_POST_DEC, /* Post decrement. */
4566 AINC_NONE /* Also the number of auto increment types. */
4569 struct ainc_cost_data
4571 int64_t costs[AINC_NONE];
4574 static comp_cost
4575 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4576 machine_mode addr_mode, machine_mode mem_mode,
4577 addr_space_t as, bool speed)
4579 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4580 && !USE_STORE_PRE_DECREMENT (mem_mode)
4581 && !USE_LOAD_POST_DECREMENT (mem_mode)
4582 && !USE_STORE_POST_DECREMENT (mem_mode)
4583 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4584 && !USE_STORE_PRE_INCREMENT (mem_mode)
4585 && !USE_LOAD_POST_INCREMENT (mem_mode)
4586 && !USE_STORE_POST_INCREMENT (mem_mode))
4587 return infinite_cost;
4589 static vec<ainc_cost_data *> ainc_cost_data_list;
4590 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4591 if (idx >= ainc_cost_data_list.length ())
4593 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4595 gcc_assert (nsize > idx);
4596 ainc_cost_data_list.safe_grow_cleared (nsize, true);
4599 ainc_cost_data *data = ainc_cost_data_list[idx];
4600 if (data == NULL)
4602 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4604 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4605 data->costs[AINC_PRE_DEC] = INFTY;
4606 data->costs[AINC_POST_DEC] = INFTY;
4607 data->costs[AINC_PRE_INC] = INFTY;
4608 data->costs[AINC_POST_INC] = INFTY;
4609 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4610 || USE_STORE_PRE_DECREMENT (mem_mode))
4612 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4614 if (memory_address_addr_space_p (mem_mode, addr, as))
4615 data->costs[AINC_PRE_DEC]
4616 = address_cost (addr, mem_mode, as, speed);
4618 if (USE_LOAD_POST_DECREMENT (mem_mode)
4619 || USE_STORE_POST_DECREMENT (mem_mode))
4621 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4623 if (memory_address_addr_space_p (mem_mode, addr, as))
4624 data->costs[AINC_POST_DEC]
4625 = address_cost (addr, mem_mode, as, speed);
4627 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4628 || USE_STORE_PRE_INCREMENT (mem_mode))
4630 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4632 if (memory_address_addr_space_p (mem_mode, addr, as))
4633 data->costs[AINC_PRE_INC]
4634 = address_cost (addr, mem_mode, as, speed);
4636 if (USE_LOAD_POST_INCREMENT (mem_mode)
4637 || USE_STORE_POST_INCREMENT (mem_mode))
4639 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4641 if (memory_address_addr_space_p (mem_mode, addr, as))
4642 data->costs[AINC_POST_INC]
4643 = address_cost (addr, mem_mode, as, speed);
4645 ainc_cost_data_list[idx] = data;
4648 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4649 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4650 return comp_cost (data->costs[AINC_POST_INC], 0);
4651 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4652 return comp_cost (data->costs[AINC_POST_DEC], 0);
4653 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4654 return comp_cost (data->costs[AINC_PRE_INC], 0);
4655 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4656 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4658 return infinite_cost;
4661 /* Return cost of computing USE's address expression by using CAND.
4662 AFF_INV and AFF_VAR represent invariant and variant parts of the
4663 address expression, respectively. If AFF_INV is simple, store
4664 the loop invariant variables which are depended by it in INV_VARS;
4665 if AFF_INV is complicated, handle it as a new invariant expression
4666 and record it in INV_EXPR. RATIO indicates multiple times between
4667 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4668 value to it indicating if this is an auto-increment address. */
4670 static comp_cost
4671 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4672 struct iv_cand *cand, aff_tree *aff_inv,
4673 aff_tree *aff_var, HOST_WIDE_INT ratio,
4674 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4675 bool *can_autoinc, bool speed)
4677 rtx addr;
4678 bool simple_inv = true;
4679 tree comp_inv = NULL_TREE, type = aff_var->type;
4680 comp_cost var_cost = no_cost, cost = no_cost;
4681 struct mem_address parts = {NULL_TREE, integer_one_node,
4682 NULL_TREE, NULL_TREE, NULL_TREE};
4683 machine_mode addr_mode = TYPE_MODE (type);
4684 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4685 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4686 /* Only true if ratio != 1. */
4687 bool ok_with_ratio_p = false;
4688 bool ok_without_ratio_p = false;
4689 code_helper code = ERROR_MARK;
4691 if (use->type == USE_PTR_ADDRESS)
4693 gcall *call = as_a<gcall *> (use->stmt);
4694 gcc_assert (gimple_call_internal_p (call));
4695 code = gimple_call_internal_fn (call);
4698 if (!aff_combination_const_p (aff_inv))
4700 parts.index = integer_one_node;
4701 /* Addressing mode "base + index". */
4702 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4703 if (ratio != 1)
4705 parts.step = wide_int_to_tree (type, ratio);
4706 /* Addressing mode "base + index << scale". */
4707 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4708 if (!ok_with_ratio_p)
4709 parts.step = NULL_TREE;
4711 if (ok_with_ratio_p || ok_without_ratio_p)
4713 if (maybe_ne (aff_inv->offset, 0))
4715 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4716 /* Addressing mode "base + index [<< scale] + offset". */
4717 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4718 parts.offset = NULL_TREE;
4719 else
4720 aff_inv->offset = 0;
4723 move_fixed_address_to_symbol (&parts, aff_inv);
4724 /* Base is fixed address and is moved to symbol part. */
4725 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4726 parts.base = NULL_TREE;
4728 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4729 if (parts.symbol != NULL_TREE
4730 && !valid_mem_ref_p (mem_mode, as, &parts, code))
4732 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4733 parts.symbol = NULL_TREE;
4734 /* Reset SIMPLE_INV since symbol address needs to be computed
4735 outside of address expression in this case. */
4736 simple_inv = false;
4737 /* Symbol part is moved back to base part, it can't be NULL. */
4738 parts.base = integer_one_node;
4741 else
4742 parts.index = NULL_TREE;
4744 else
4746 poly_int64 ainc_step;
4747 if (can_autoinc
4748 && ratio == 1
4749 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4751 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4753 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4754 ainc_offset += ainc_step;
4755 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4756 addr_mode, mem_mode, as, speed);
4757 if (!cost.infinite_cost_p ())
4759 *can_autoinc = true;
4760 return cost;
4762 cost = no_cost;
4764 if (!aff_combination_zero_p (aff_inv))
4766 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4767 /* Addressing mode "base + offset". */
4768 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4769 parts.offset = NULL_TREE;
4770 else
4771 aff_inv->offset = 0;
4775 if (simple_inv)
4776 simple_inv = (aff_inv == NULL
4777 || aff_combination_const_p (aff_inv)
4778 || aff_combination_singleton_var_p (aff_inv));
4779 if (!aff_combination_zero_p (aff_inv))
4780 comp_inv = aff_combination_to_tree (aff_inv);
4781 if (comp_inv != NULL_TREE)
4782 cost = force_var_cost (data, comp_inv, inv_vars);
4783 if (ratio != 1 && parts.step == NULL_TREE)
4784 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4785 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4786 var_cost += add_cost (speed, addr_mode);
4788 if (comp_inv && inv_expr && !simple_inv)
4790 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4791 /* Clear depends on. */
4792 if (*inv_expr != NULL && inv_vars && *inv_vars)
4793 bitmap_clear (*inv_vars);
4795 /* Cost of small invariant expression adjusted against loop niters
4796 is usually zero, which makes it difficult to be differentiated
4797 from candidate based on loop invariant variables. Secondly, the
4798 generated invariant expression may not be hoisted out of loop by
4799 following pass. We penalize the cost by rounding up in order to
4800 neutralize such effects. */
4801 cost.cost = adjust_setup_cost (data, cost.cost, true);
4802 cost.scratch = cost.cost;
4805 cost += var_cost;
4806 addr = addr_for_mem_ref (&parts, as, false);
4807 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4808 cost += address_cost (addr, mem_mode, as, speed);
4810 if (parts.symbol != NULL_TREE)
4811 cost.complexity += 1;
4812 /* Don't increase the complexity of adding a scaled index if it's
4813 the only kind of index that the target allows. */
4814 if (parts.step != NULL_TREE && ok_without_ratio_p)
4815 cost.complexity += 1;
4816 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4817 cost.complexity += 1;
4818 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4819 cost.complexity += 1;
4821 return cost;
4824 /* Scale (multiply) the computed COST (except scratch part that should be
4825 hoisted out a loop) by header->frequency / AT->frequency, which makes
4826 expected cost more accurate. */
4828 static comp_cost
4829 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4831 if (data->speed
4832 && data->current_loop->header->count.to_frequency (cfun) > 0)
4834 basic_block bb = gimple_bb (at);
4835 gcc_assert (cost.scratch <= cost.cost);
4836 int scale_factor = (int)(intptr_t) bb->aux;
4837 if (scale_factor == 1)
4838 return cost;
4840 int64_t scaled_cost
4841 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4843 if (dump_file && (dump_flags & TDF_DETAILS))
4844 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4845 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4846 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4848 cost.cost = scaled_cost;
4851 return cost;
4854 /* Determines the cost of the computation by that USE is expressed
4855 from induction variable CAND. If ADDRESS_P is true, we just need
4856 to create an address from it, otherwise we want to get it into
4857 register. A set of invariants we depend on is stored in INV_VARS.
4858 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4859 addressing is likely. If INV_EXPR is nonnull, record invariant
4860 expr entry in it. */
4862 static comp_cost
4863 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4864 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4865 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4867 gimple *at = use->stmt;
4868 tree ubase = use->iv->base, cbase = cand->iv->base;
4869 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4870 tree comp_inv = NULL_TREE;
4871 HOST_WIDE_INT ratio, aratio;
4872 comp_cost cost;
4873 widest_int rat;
4874 aff_tree aff_inv, aff_var;
4875 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4877 if (inv_vars)
4878 *inv_vars = NULL;
4879 if (can_autoinc)
4880 *can_autoinc = false;
4881 if (inv_expr)
4882 *inv_expr = NULL;
4884 /* Check if we have enough precision to express the values of use. */
4885 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4886 return infinite_cost;
4888 if (address_p
4889 || (use->iv->base_object
4890 && cand->iv->base_object
4891 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4892 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4894 /* Do not try to express address of an object with computation based
4895 on address of a different object. This may cause problems in rtl
4896 level alias analysis (that does not expect this to be happening,
4897 as this is illegal in C), and would be unlikely to be useful
4898 anyway. */
4899 if (use->iv->base_object
4900 && cand->iv->base_object
4901 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4902 return infinite_cost;
4905 if (!get_computation_aff_1 (data->current_loop, at, use,
4906 cand, &aff_inv, &aff_var, &rat)
4907 || !wi::fits_shwi_p (rat))
4908 return infinite_cost;
4910 ratio = rat.to_shwi ();
4911 if (address_p)
4913 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4914 inv_vars, inv_expr, can_autoinc, speed);
4915 cost = get_scaled_computation_cost_at (data, at, cost);
4916 /* For doloop IV cand, add on the extra cost. */
4917 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4918 return cost;
4921 bool simple_inv = (aff_combination_const_p (&aff_inv)
4922 || aff_combination_singleton_var_p (&aff_inv));
4923 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4924 aff_combination_convert (&aff_inv, signed_type);
4925 if (!aff_combination_zero_p (&aff_inv))
4926 comp_inv = aff_combination_to_tree (&aff_inv);
4928 cost = force_var_cost (data, comp_inv, inv_vars);
4929 if (comp_inv && inv_expr && !simple_inv)
4931 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4932 /* Clear depends on. */
4933 if (*inv_expr != NULL && inv_vars && *inv_vars)
4934 bitmap_clear (*inv_vars);
4936 cost.cost = adjust_setup_cost (data, cost.cost);
4937 /* Record setup cost in scratch field. */
4938 cost.scratch = cost.cost;
4940 /* Cost of constant integer can be covered when adding invariant part to
4941 variant part. */
4942 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4943 cost = no_cost;
4945 /* Need type narrowing to represent use with cand. */
4946 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4948 machine_mode outer_mode = TYPE_MODE (utype);
4949 machine_mode inner_mode = TYPE_MODE (ctype);
4950 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4953 /* Turn a + i * (-c) into a - i * c. */
4954 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4955 aratio = -ratio;
4956 else
4957 aratio = ratio;
4959 if (ratio != 1)
4960 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4962 /* TODO: We may also need to check if we can compute a + i * 4 in one
4963 instruction. */
4964 /* Need to add up the invariant and variant parts. */
4965 if (comp_inv && !integer_zerop (comp_inv))
4966 cost += add_cost (speed, TYPE_MODE (utype));
4968 cost = get_scaled_computation_cost_at (data, at, cost);
4970 /* For doloop IV cand, add on the extra cost. */
4971 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4972 cost += targetm.doloop_cost_for_generic;
4974 return cost;
4977 /* Determines cost of computing the use in GROUP with CAND in a generic
4978 expression. */
4980 static bool
4981 determine_group_iv_cost_generic (struct ivopts_data *data,
4982 struct iv_group *group, struct iv_cand *cand)
4984 comp_cost cost;
4985 iv_inv_expr_ent *inv_expr = NULL;
4986 bitmap inv_vars = NULL, inv_exprs = NULL;
4987 struct iv_use *use = group->vuses[0];
4989 /* The simple case first -- if we need to express value of the preserved
4990 original biv, the cost is 0. This also prevents us from counting the
4991 cost of increment twice -- once at this use and once in the cost of
4992 the candidate. */
4993 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4994 cost = no_cost;
4995 /* If the IV candidate involves undefined SSA values and is not the
4996 same IV as on the USE avoid using that candidate here. */
4997 else if (cand->involves_undefs
4998 && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4999 return false;
5000 else
5001 cost = get_computation_cost (data, use, cand, false,
5002 &inv_vars, NULL, &inv_expr);
5004 if (inv_expr)
5006 inv_exprs = BITMAP_ALLOC (NULL);
5007 bitmap_set_bit (inv_exprs, inv_expr->id);
5009 set_group_iv_cost (data, group, cand, cost, inv_vars,
5010 NULL_TREE, ERROR_MARK, inv_exprs);
5011 return !cost.infinite_cost_p ();
5014 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5016 static bool
5017 determine_group_iv_cost_address (struct ivopts_data *data,
5018 struct iv_group *group, struct iv_cand *cand)
5020 unsigned i;
5021 bitmap inv_vars = NULL, inv_exprs = NULL;
5022 bool can_autoinc;
5023 iv_inv_expr_ent *inv_expr = NULL;
5024 struct iv_use *use = group->vuses[0];
5025 comp_cost sum_cost = no_cost, cost;
5027 cost = get_computation_cost (data, use, cand, true,
5028 &inv_vars, &can_autoinc, &inv_expr);
5030 if (inv_expr)
5032 inv_exprs = BITMAP_ALLOC (NULL);
5033 bitmap_set_bit (inv_exprs, inv_expr->id);
5035 sum_cost = cost;
5036 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5038 if (can_autoinc)
5039 sum_cost -= cand->cost_step;
5040 /* If we generated the candidate solely for exploiting autoincrement
5041 opportunities, and it turns out it can't be used, set the cost to
5042 infinity to make sure we ignore it. */
5043 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5044 sum_cost = infinite_cost;
5047 /* Uses in a group can share setup code, so only add setup cost once. */
5048 cost -= cost.scratch;
5049 /* Compute and add costs for rest uses of this group. */
5050 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5052 struct iv_use *next = group->vuses[i];
5054 /* TODO: We could skip computing cost for sub iv_use when it has the
5055 same cost as the first iv_use, but the cost really depends on the
5056 offset and where the iv_use is. */
5057 cost = get_computation_cost (data, next, cand, true,
5058 NULL, &can_autoinc, &inv_expr);
5059 if (inv_expr)
5061 if (!inv_exprs)
5062 inv_exprs = BITMAP_ALLOC (NULL);
5064 bitmap_set_bit (inv_exprs, inv_expr->id);
5066 sum_cost += cost;
5068 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5069 NULL_TREE, ERROR_MARK, inv_exprs);
5071 return !sum_cost.infinite_cost_p ();
5074 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5075 and stores it to VAL. */
5077 static void
5078 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5079 class tree_niter_desc *desc, aff_tree *val)
5081 aff_tree step, delta, nit;
5082 struct iv *iv = cand->iv;
5083 tree type = TREE_TYPE (iv->base);
5084 tree niter = desc->niter;
5085 bool after_adjust = stmt_after_increment (loop, cand, at);
5086 tree steptype;
5088 if (POINTER_TYPE_P (type))
5089 steptype = sizetype;
5090 else
5091 steptype = unsigned_type_for (type);
5093 /* If AFTER_ADJUST is required, the code below generates the equivalent
5094 of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5095 BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5096 SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5097 doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5098 class for common idioms that we know are safe. */
5099 if (after_adjust
5100 && desc->control.no_overflow
5101 && integer_onep (desc->control.step)
5102 && (desc->cmp == LT_EXPR
5103 || desc->cmp == NE_EXPR)
5104 && TREE_CODE (desc->bound) == SSA_NAME)
5106 if (integer_onep (desc->control.base))
5108 niter = desc->bound;
5109 after_adjust = false;
5111 else if (TREE_CODE (niter) == MINUS_EXPR
5112 && integer_onep (TREE_OPERAND (niter, 1)))
5114 niter = TREE_OPERAND (niter, 0);
5115 after_adjust = false;
5119 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5120 aff_combination_convert (&step, steptype);
5121 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5122 aff_combination_convert (&nit, steptype);
5123 aff_combination_mult (&nit, &step, &delta);
5124 if (after_adjust)
5125 aff_combination_add (&delta, &step);
5127 tree_to_aff_combination (iv->base, type, val);
5128 if (!POINTER_TYPE_P (type))
5129 aff_combination_convert (val, steptype);
5130 aff_combination_add (val, &delta);
5133 /* Returns period of induction variable iv. */
5135 static tree
5136 iv_period (struct iv *iv)
5138 tree step = iv->step, period, type;
5139 tree pow2div;
5141 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5143 type = unsigned_type_for (TREE_TYPE (step));
5144 /* Period of the iv is lcm (step, type_range)/step -1,
5145 i.e., N*type_range/step - 1. Since type range is power
5146 of two, N == (step >> num_of_ending_zeros_binary (step),
5147 so the final result is
5149 (type_range >> num_of_ending_zeros_binary (step)) - 1
5152 pow2div = num_ending_zeros (step);
5154 period = build_low_bits_mask (type,
5155 (TYPE_PRECISION (type)
5156 - tree_to_uhwi (pow2div)));
5158 return period;
5161 /* Returns the comparison operator used when eliminating the iv USE. */
5163 static enum tree_code
5164 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5166 class loop *loop = data->current_loop;
5167 basic_block ex_bb;
5168 edge exit;
5170 ex_bb = gimple_bb (use->stmt);
5171 exit = EDGE_SUCC (ex_bb, 0);
5172 if (flow_bb_inside_loop_p (loop, exit->dest))
5173 exit = EDGE_SUCC (ex_bb, 1);
5175 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5178 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5179 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5180 calculation is performed in non-wrapping type.
5182 TODO: More generally, we could test for the situation that
5183 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5184 This would require knowing the sign of OFFSET. */
5186 static bool
5187 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5189 enum tree_code code;
5190 tree e1, e2;
5191 aff_tree aff_e1, aff_e2, aff_offset;
5193 if (!nowrap_type_p (TREE_TYPE (base)))
5194 return false;
5196 base = expand_simple_operations (base);
5198 if (TREE_CODE (base) == SSA_NAME)
5200 gimple *stmt = SSA_NAME_DEF_STMT (base);
5202 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5203 return false;
5205 code = gimple_assign_rhs_code (stmt);
5206 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5207 return false;
5209 e1 = gimple_assign_rhs1 (stmt);
5210 e2 = gimple_assign_rhs2 (stmt);
5212 else
5214 code = TREE_CODE (base);
5215 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5216 return false;
5217 e1 = TREE_OPERAND (base, 0);
5218 e2 = TREE_OPERAND (base, 1);
5221 /* Use affine expansion as deeper inspection to prove the equality. */
5222 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5223 &aff_e2, &data->name_expansion_cache);
5224 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5225 &aff_offset, &data->name_expansion_cache);
5226 aff_combination_scale (&aff_offset, -1);
5227 switch (code)
5229 case PLUS_EXPR:
5230 aff_combination_add (&aff_e2, &aff_offset);
5231 if (aff_combination_zero_p (&aff_e2))
5232 return true;
5234 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5235 &aff_e1, &data->name_expansion_cache);
5236 aff_combination_add (&aff_e1, &aff_offset);
5237 return aff_combination_zero_p (&aff_e1);
5239 case POINTER_PLUS_EXPR:
5240 aff_combination_add (&aff_e2, &aff_offset);
5241 return aff_combination_zero_p (&aff_e2);
5243 default:
5244 return false;
5248 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5249 comparison with CAND. NITER describes the number of iterations of
5250 the loops. If successful, the comparison in COMP_P is altered accordingly.
5252 We aim to handle the following situation:
5254 sometype *base, *p;
5255 int a, b, i;
5257 i = a;
5258 p = p_0 = base + a;
5262 bla (*p);
5263 p++;
5264 i++;
5266 while (i < b);
5268 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5269 We aim to optimize this to
5271 p = p_0 = base + a;
5274 bla (*p);
5275 p++;
5277 while (p < p_0 - a + b);
5279 This preserves the correctness, since the pointer arithmetics does not
5280 overflow. More precisely:
5282 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5283 overflow in computing it or the values of p.
5284 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5285 overflow. To prove this, we use the fact that p_0 = base + a. */
5287 static bool
5288 iv_elimination_compare_lt (struct ivopts_data *data,
5289 struct iv_cand *cand, enum tree_code *comp_p,
5290 class tree_niter_desc *niter)
5292 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5293 class aff_tree nit, tmpa, tmpb;
5294 enum tree_code comp;
5295 HOST_WIDE_INT step;
5297 /* We need to know that the candidate induction variable does not overflow.
5298 While more complex analysis may be used to prove this, for now just
5299 check that the variable appears in the original program and that it
5300 is computed in a type that guarantees no overflows. */
5301 cand_type = TREE_TYPE (cand->iv->base);
5302 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5303 return false;
5305 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5306 the calculation of the BOUND could overflow, making the comparison
5307 invalid. */
5308 if (!data->loop_single_exit_p)
5309 return false;
5311 /* We need to be able to decide whether candidate is increasing or decreasing
5312 in order to choose the right comparison operator. */
5313 if (!cst_and_fits_in_hwi (cand->iv->step))
5314 return false;
5315 step = int_cst_value (cand->iv->step);
5317 /* Check that the number of iterations matches the expected pattern:
5318 a + 1 > b ? 0 : b - a - 1. */
5319 mbz = niter->may_be_zero;
5320 if (TREE_CODE (mbz) == GT_EXPR)
5322 /* Handle a + 1 > b. */
5323 tree op0 = TREE_OPERAND (mbz, 0);
5324 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5326 a = TREE_OPERAND (op0, 0);
5327 b = TREE_OPERAND (mbz, 1);
5329 else
5330 return false;
5332 else if (TREE_CODE (mbz) == LT_EXPR)
5334 tree op1 = TREE_OPERAND (mbz, 1);
5336 /* Handle b < a + 1. */
5337 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5339 a = TREE_OPERAND (op1, 0);
5340 b = TREE_OPERAND (mbz, 0);
5342 else
5343 return false;
5345 else
5346 return false;
5348 /* Expected number of iterations is B - A - 1. Check that it matches
5349 the actual number, i.e., that B - A - NITER = 1. */
5350 tree_to_aff_combination (niter->niter, nit_type, &nit);
5351 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5352 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5353 aff_combination_scale (&nit, -1);
5354 aff_combination_scale (&tmpa, -1);
5355 aff_combination_add (&tmpb, &tmpa);
5356 aff_combination_add (&tmpb, &nit);
5357 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5358 return false;
5360 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5361 overflow. */
5362 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5363 cand->iv->step,
5364 fold_convert (TREE_TYPE (cand->iv->step), a));
5365 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5366 return false;
5368 /* Determine the new comparison operator. */
5369 comp = step < 0 ? GT_EXPR : LT_EXPR;
5370 if (*comp_p == NE_EXPR)
5371 *comp_p = comp;
5372 else if (*comp_p == EQ_EXPR)
5373 *comp_p = invert_tree_comparison (comp, false);
5374 else
5375 gcc_unreachable ();
5377 return true;
5380 /* Check whether it is possible to express the condition in USE by comparison
5381 of candidate CAND. If so, store the value compared with to BOUND, and the
5382 comparison operator to COMP. */
5384 static bool
5385 may_eliminate_iv (struct ivopts_data *data,
5386 struct iv_use *use, struct iv_cand *cand, tree *bound,
5387 enum tree_code *comp)
5389 basic_block ex_bb;
5390 edge exit;
5391 tree period;
5392 class loop *loop = data->current_loop;
5393 aff_tree bnd;
5394 class tree_niter_desc *desc = NULL;
5396 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5397 return false;
5399 /* For now works only for exits that dominate the loop latch.
5400 TODO: extend to other conditions inside loop body. */
5401 ex_bb = gimple_bb (use->stmt);
5402 if (use->stmt != last_nondebug_stmt (ex_bb)
5403 || gimple_code (use->stmt) != GIMPLE_COND
5404 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5405 return false;
5407 exit = EDGE_SUCC (ex_bb, 0);
5408 if (flow_bb_inside_loop_p (loop, exit->dest))
5409 exit = EDGE_SUCC (ex_bb, 1);
5410 if (flow_bb_inside_loop_p (loop, exit->dest))
5411 return false;
5413 desc = niter_for_exit (data, exit);
5414 if (!desc)
5415 return false;
5417 /* Determine whether we can use the variable to test the exit condition.
5418 This is the case iff the period of the induction variable is greater
5419 than the number of iterations for which the exit condition is true. */
5420 period = iv_period (cand->iv);
5422 /* If the number of iterations is constant, compare against it directly. */
5423 if (TREE_CODE (desc->niter) == INTEGER_CST)
5425 /* See cand_value_at. */
5426 if (stmt_after_increment (loop, cand, use->stmt))
5428 if (!tree_int_cst_lt (desc->niter, period))
5429 return false;
5431 else
5433 if (tree_int_cst_lt (period, desc->niter))
5434 return false;
5438 /* If not, and if this is the only possible exit of the loop, see whether
5439 we can get a conservative estimate on the number of iterations of the
5440 entire loop and compare against that instead. */
5441 else
5443 widest_int period_value, max_niter;
5445 max_niter = desc->max;
5446 if (stmt_after_increment (loop, cand, use->stmt))
5447 max_niter += 1;
5448 period_value = wi::to_widest (period);
5449 if (wi::gtu_p (max_niter, period_value))
5451 /* See if we can take advantage of inferred loop bound
5452 information. */
5453 if (data->loop_single_exit_p)
5455 if (!max_loop_iterations (loop, &max_niter))
5456 return false;
5457 /* The loop bound is already adjusted by adding 1. */
5458 if (wi::gtu_p (max_niter, period_value))
5459 return false;
5461 else
5462 return false;
5466 /* For doloop IV cand, the bound would be zero. It's safe whether
5467 may_be_zero set or not. */
5468 if (cand->doloop_p)
5470 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5471 *comp = iv_elimination_compare (data, use);
5472 return true;
5475 cand_value_at (loop, cand, use->stmt, desc, &bnd);
5477 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5478 aff_combination_to_tree (&bnd));
5479 *comp = iv_elimination_compare (data, use);
5481 /* It is unlikely that computing the number of iterations using division
5482 would be more profitable than keeping the original induction variable. */
5483 bool cond_overflow_p;
5484 if (expression_expensive_p (*bound, &cond_overflow_p))
5485 return false;
5487 /* Sometimes, it is possible to handle the situation that the number of
5488 iterations may be zero unless additional assumptions by using <
5489 instead of != in the exit condition.
5491 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5492 base the exit condition on it. However, that is often too
5493 expensive. */
5494 if (!integer_zerop (desc->may_be_zero))
5495 return iv_elimination_compare_lt (data, cand, comp, desc);
5497 return true;
5500 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5501 be copied, if it is used in the loop body and DATA->body_includes_call. */
5503 static int
5504 parm_decl_cost (struct ivopts_data *data, tree bound)
5506 tree sbound = bound;
5507 STRIP_NOPS (sbound);
5509 if (TREE_CODE (sbound) == SSA_NAME
5510 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5511 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5512 && data->body_includes_call)
5513 return COSTS_N_INSNS (1);
5515 return 0;
5518 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5520 static bool
5521 determine_group_iv_cost_cond (struct ivopts_data *data,
5522 struct iv_group *group, struct iv_cand *cand)
5524 tree bound = NULL_TREE;
5525 struct iv *cmp_iv;
5526 bitmap inv_exprs = NULL;
5527 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5528 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5529 enum comp_iv_rewrite rewrite_type;
5530 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5531 tree *control_var, *bound_cst;
5532 enum tree_code comp = ERROR_MARK;
5533 struct iv_use *use = group->vuses[0];
5535 /* Extract condition operands. */
5536 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5537 &bound_cst, NULL, &cmp_iv);
5538 gcc_assert (rewrite_type != COMP_IV_NA);
5540 /* Try iv elimination. */
5541 if (rewrite_type == COMP_IV_ELIM
5542 && may_eliminate_iv (data, use, cand, &bound, &comp))
5544 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5545 if (elim_cost.cost == 0)
5546 elim_cost.cost = parm_decl_cost (data, bound);
5547 else if (TREE_CODE (bound) == INTEGER_CST)
5548 elim_cost.cost = 0;
5549 /* If we replace a loop condition 'i < n' with 'p < base + n',
5550 inv_vars_elim will have 'base' and 'n' set, which implies that both
5551 'base' and 'n' will be live during the loop. More likely,
5552 'base + n' will be loop invariant, resulting in only one live value
5553 during the loop. So in that case we clear inv_vars_elim and set
5554 inv_expr_elim instead. */
5555 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5557 inv_expr_elim = get_loop_invariant_expr (data, bound);
5558 bitmap_clear (inv_vars_elim);
5560 /* The bound is a loop invariant, so it will be only computed
5561 once. */
5562 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5565 /* When the condition is a comparison of the candidate IV against
5566 zero, prefer this IV.
5568 TODO: The constant that we're subtracting from the cost should
5569 be target-dependent. This information should be added to the
5570 target costs for each backend. */
5571 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5572 && integer_zerop (*bound_cst)
5573 && (operand_equal_p (*control_var, cand->var_after, 0)
5574 || operand_equal_p (*control_var, cand->var_before, 0)))
5575 elim_cost -= 1;
5577 express_cost = get_computation_cost (data, use, cand, false,
5578 &inv_vars_express, NULL,
5579 &inv_expr_express);
5580 if (cmp_iv != NULL)
5581 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5583 /* Count the cost of the original bound as well. */
5584 bound_cost = force_var_cost (data, *bound_cst, NULL);
5585 if (bound_cost.cost == 0)
5586 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5587 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5588 bound_cost.cost = 0;
5589 express_cost += bound_cost;
5591 /* Choose the better approach, preferring the eliminated IV. */
5592 if (elim_cost <= express_cost)
5594 cost = elim_cost;
5595 inv_vars = inv_vars_elim;
5596 inv_vars_elim = NULL;
5597 inv_expr = inv_expr_elim;
5598 /* For doloop candidate/use pair, adjust to zero cost. */
5599 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5600 cost = no_cost;
5602 else
5604 cost = express_cost;
5605 inv_vars = inv_vars_express;
5606 inv_vars_express = NULL;
5607 bound = NULL_TREE;
5608 comp = ERROR_MARK;
5609 inv_expr = inv_expr_express;
5612 if (inv_expr)
5614 inv_exprs = BITMAP_ALLOC (NULL);
5615 bitmap_set_bit (inv_exprs, inv_expr->id);
5617 set_group_iv_cost (data, group, cand, cost,
5618 inv_vars, bound, comp, inv_exprs);
5620 if (inv_vars_elim)
5621 BITMAP_FREE (inv_vars_elim);
5622 if (inv_vars_express)
5623 BITMAP_FREE (inv_vars_express);
5625 return !cost.infinite_cost_p ();
5628 /* Determines cost of computing uses in GROUP with CAND. Returns false
5629 if USE cannot be represented with CAND. */
5631 static bool
5632 determine_group_iv_cost (struct ivopts_data *data,
5633 struct iv_group *group, struct iv_cand *cand)
5635 switch (group->type)
5637 case USE_NONLINEAR_EXPR:
5638 return determine_group_iv_cost_generic (data, group, cand);
5640 case USE_REF_ADDRESS:
5641 case USE_PTR_ADDRESS:
5642 return determine_group_iv_cost_address (data, group, cand);
5644 case USE_COMPARE:
5645 return determine_group_iv_cost_cond (data, group, cand);
5647 default:
5648 gcc_unreachable ();
5652 /* Return true if get_computation_cost indicates that autoincrement is
5653 a possibility for the pair of USE and CAND, false otherwise. */
5655 static bool
5656 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5657 struct iv_cand *cand)
5659 if (!address_p (use->type))
5660 return false;
5662 bool can_autoinc = false;
5663 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5664 return can_autoinc;
5667 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5668 use that allows autoincrement, and set their AINC_USE if possible. */
5670 static void
5671 set_autoinc_for_original_candidates (struct ivopts_data *data)
5673 unsigned i, j;
5675 for (i = 0; i < data->vcands.length (); i++)
5677 struct iv_cand *cand = data->vcands[i];
5678 struct iv_use *closest_before = NULL;
5679 struct iv_use *closest_after = NULL;
5680 if (cand->pos != IP_ORIGINAL)
5681 continue;
5683 for (j = 0; j < data->vgroups.length (); j++)
5685 struct iv_group *group = data->vgroups[j];
5686 struct iv_use *use = group->vuses[0];
5687 unsigned uid = gimple_uid (use->stmt);
5689 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5690 continue;
5692 if (uid < gimple_uid (cand->incremented_at)
5693 && (closest_before == NULL
5694 || uid > gimple_uid (closest_before->stmt)))
5695 closest_before = use;
5697 if (uid > gimple_uid (cand->incremented_at)
5698 && (closest_after == NULL
5699 || uid < gimple_uid (closest_after->stmt)))
5700 closest_after = use;
5703 if (closest_before != NULL
5704 && autoinc_possible_for_pair (data, closest_before, cand))
5705 cand->ainc_use = closest_before;
5706 else if (closest_after != NULL
5707 && autoinc_possible_for_pair (data, closest_after, cand))
5708 cand->ainc_use = closest_after;
5712 /* Relate compare use with all candidates. */
5714 static void
5715 relate_compare_use_with_all_cands (struct ivopts_data *data)
5717 unsigned i, count = data->vcands.length ();
5718 for (i = 0; i < data->vgroups.length (); i++)
5720 struct iv_group *group = data->vgroups[i];
5722 if (group->type == USE_COMPARE)
5723 bitmap_set_range (group->related_cands, 0, count);
5727 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5728 PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5730 static tree
5731 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5732 const widest_int &iterations_max)
5734 tree ntype = TREE_TYPE (niter);
5735 tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5736 if (!pref_type)
5737 return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5738 build_int_cst (ntype, 1));
5740 gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5742 int prec = TYPE_PRECISION (ntype);
5743 int pref_prec = TYPE_PRECISION (pref_type);
5745 tree base;
5747 /* Check if the PREFERRED_MODED is able to present niter. */
5748 if (pref_prec > prec
5749 || wi::ltu_p (iterations_max,
5750 widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5751 UNSIGNED)))
5753 /* No wrap, it is safe to use preferred type after niter + 1. */
5754 if (wi::ltu_p (iterations_max,
5755 widest_int::from (wi::max_value (prec, UNSIGNED),
5756 UNSIGNED)))
5758 /* This could help to optimize "-1 +1" pair when niter looks
5759 like "n-1": n is in original mode. "base = (n - 1) + 1"
5760 in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5761 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5762 build_int_cst (ntype, 1));
5763 base = fold_convert (pref_type, base);
5766 /* To avoid wrap, convert niter to preferred type before plus 1. */
5767 else
5769 niter = fold_convert (pref_type, niter);
5770 base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5771 build_int_cst (pref_type, 1));
5774 else
5775 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5776 build_int_cst (ntype, 1));
5777 return base;
5780 /* Add one doloop dedicated IV candidate:
5781 - Base is (may_be_zero ? 1 : (niter + 1)).
5782 - Step is -1. */
5784 static void
5785 add_iv_candidate_for_doloop (struct ivopts_data *data)
5787 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5788 gcc_assert (niter_desc && niter_desc->assumptions);
5790 tree niter = niter_desc->niter;
5791 tree ntype = TREE_TYPE (niter);
5792 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5794 tree may_be_zero = niter_desc->may_be_zero;
5795 if (may_be_zero && integer_zerop (may_be_zero))
5796 may_be_zero = NULL_TREE;
5797 if (may_be_zero)
5799 if (COMPARISON_CLASS_P (may_be_zero))
5801 niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5802 build_int_cst (ntype, 0),
5803 rewrite_to_non_trapping_overflow (niter));
5805 /* Don't try to obtain the iteration count expression when may_be_zero is
5806 integer_nonzerop (actually iteration count is one) or else. */
5807 else
5808 return;
5811 machine_mode mode = TYPE_MODE (ntype);
5812 machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5814 tree base;
5815 if (mode != pref_mode)
5817 base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5818 ntype = TREE_TYPE (base);
5820 else
5821 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5822 build_int_cst (ntype, 1));
5825 add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5828 /* Finds the candidates for the induction variables. */
5830 static void
5831 find_iv_candidates (struct ivopts_data *data)
5833 /* Add commonly used ivs. */
5834 add_standard_iv_candidates (data);
5836 /* Add doloop dedicated ivs. */
5837 if (data->doloop_use_p)
5838 add_iv_candidate_for_doloop (data);
5840 /* Add old induction variables. */
5841 add_iv_candidate_for_bivs (data);
5843 /* Add induction variables derived from uses. */
5844 add_iv_candidate_for_groups (data);
5846 set_autoinc_for_original_candidates (data);
5848 /* Record the important candidates. */
5849 record_important_candidates (data);
5851 /* Relate compare iv_use with all candidates. */
5852 if (!data->consider_all_candidates)
5853 relate_compare_use_with_all_cands (data);
5855 if (dump_file && (dump_flags & TDF_DETAILS))
5857 unsigned i;
5859 fprintf (dump_file, "\n<Important Candidates>:\t");
5860 for (i = 0; i < data->vcands.length (); i++)
5861 if (data->vcands[i]->important)
5862 fprintf (dump_file, " %d,", data->vcands[i]->id);
5863 fprintf (dump_file, "\n");
5865 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5866 for (i = 0; i < data->vgroups.length (); i++)
5868 struct iv_group *group = data->vgroups[i];
5870 if (group->related_cands)
5872 fprintf (dump_file, " Group %d:\t", group->id);
5873 dump_bitmap (dump_file, group->related_cands);
5876 fprintf (dump_file, "\n");
5880 /* Determines costs of computing use of iv with an iv candidate. */
5882 static void
5883 determine_group_iv_costs (struct ivopts_data *data)
5885 unsigned i, j;
5886 struct iv_cand *cand;
5887 struct iv_group *group;
5888 bitmap to_clear = BITMAP_ALLOC (NULL);
5890 alloc_use_cost_map (data);
5892 for (i = 0; i < data->vgroups.length (); i++)
5894 group = data->vgroups[i];
5896 if (data->consider_all_candidates)
5898 for (j = 0; j < data->vcands.length (); j++)
5900 cand = data->vcands[j];
5901 determine_group_iv_cost (data, group, cand);
5904 else
5906 bitmap_iterator bi;
5908 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5910 cand = data->vcands[j];
5911 if (!determine_group_iv_cost (data, group, cand))
5912 bitmap_set_bit (to_clear, j);
5915 /* Remove the candidates for that the cost is infinite from
5916 the list of related candidates. */
5917 bitmap_and_compl_into (group->related_cands, to_clear);
5918 bitmap_clear (to_clear);
5922 BITMAP_FREE (to_clear);
5924 if (dump_file && (dump_flags & TDF_DETAILS))
5926 bitmap_iterator bi;
5928 /* Dump invariant variables. */
5929 fprintf (dump_file, "\n<Invariant Vars>:\n");
5930 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5932 struct version_info *info = ver_info (data, i);
5933 if (info->inv_id)
5935 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5936 print_generic_expr (dump_file, info->name, TDF_SLIM);
5937 fprintf (dump_file, "%s\n",
5938 info->has_nonlin_use ? "" : "\t(eliminable)");
5942 /* Dump invariant expressions. */
5943 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5944 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5946 for (hash_table<iv_inv_expr_hasher>::iterator it
5947 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5948 ++it)
5949 list.safe_push (*it);
5951 list.qsort (sort_iv_inv_expr_ent);
5953 for (i = 0; i < list.length (); ++i)
5955 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5956 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5957 fprintf (dump_file, "\n");
5960 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5962 for (i = 0; i < data->vgroups.length (); i++)
5964 group = data->vgroups[i];
5966 fprintf (dump_file, "Group %d:\n", i);
5967 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5968 for (j = 0; j < group->n_map_members; j++)
5970 if (!group->cost_map[j].cand
5971 || group->cost_map[j].cost.infinite_cost_p ())
5972 continue;
5974 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5975 group->cost_map[j].cand->id,
5976 group->cost_map[j].cost.cost,
5977 group->cost_map[j].cost.complexity);
5978 if (!group->cost_map[j].inv_exprs
5979 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5980 fprintf (dump_file, "NIL;\t");
5981 else
5982 bitmap_print (dump_file,
5983 group->cost_map[j].inv_exprs, "", ";\t");
5984 if (!group->cost_map[j].inv_vars
5985 || bitmap_empty_p (group->cost_map[j].inv_vars))
5986 fprintf (dump_file, "NIL;\n");
5987 else
5988 bitmap_print (dump_file,
5989 group->cost_map[j].inv_vars, "", "\n");
5992 fprintf (dump_file, "\n");
5994 fprintf (dump_file, "\n");
5998 /* Determines cost of the candidate CAND. */
6000 static void
6001 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6003 comp_cost cost_base;
6004 int64_t cost, cost_step;
6005 tree base;
6007 gcc_assert (cand->iv != NULL);
6009 /* There are two costs associated with the candidate -- its increment
6010 and its initialization. The second is almost negligible for any loop
6011 that rolls enough, so we take it just very little into account. */
6013 base = cand->iv->base;
6014 cost_base = force_var_cost (data, base, NULL);
6015 /* It will be exceptional that the iv register happens to be initialized with
6016 the proper value at no cost. In general, there will at least be a regcopy
6017 or a const set. */
6018 if (cost_base.cost == 0)
6019 cost_base.cost = COSTS_N_INSNS (1);
6020 /* Doloop decrement should be considered as zero cost. */
6021 if (cand->doloop_p)
6022 cost_step = 0;
6023 else
6024 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6025 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6027 /* Prefer the original ivs unless we may gain something by replacing it.
6028 The reason is to make debugging simpler; so this is not relevant for
6029 artificial ivs created by other optimization passes. */
6030 if ((cand->pos != IP_ORIGINAL
6031 || !SSA_NAME_VAR (cand->var_before)
6032 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6033 /* Prefer doloop as well. */
6034 && !cand->doloop_p)
6035 cost++;
6037 /* Prefer not to insert statements into latch unless there are some
6038 already (so that we do not create unnecessary jumps). */
6039 if (cand->pos == IP_END
6040 && empty_block_p (ip_end_pos (data->current_loop)))
6041 cost++;
6043 cand->cost = cost;
6044 cand->cost_step = cost_step;
6047 /* Determines costs of computation of the candidates. */
6049 static void
6050 determine_iv_costs (struct ivopts_data *data)
6052 unsigned i;
6054 if (dump_file && (dump_flags & TDF_DETAILS))
6056 fprintf (dump_file, "<Candidate Costs>:\n");
6057 fprintf (dump_file, " cand\tcost\n");
6060 for (i = 0; i < data->vcands.length (); i++)
6062 struct iv_cand *cand = data->vcands[i];
6064 determine_iv_cost (data, cand);
6066 if (dump_file && (dump_flags & TDF_DETAILS))
6067 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6070 if (dump_file && (dump_flags & TDF_DETAILS))
6071 fprintf (dump_file, "\n");
6074 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6075 induction variables. Note N_INVS includes both invariant variables and
6076 invariant expressions. */
6078 static unsigned
6079 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6080 unsigned n_cands)
6082 unsigned cost;
6083 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6084 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6085 bool speed = data->speed;
6087 /* If there is a call in the loop body, the call-clobbered registers
6088 are not available for loop invariants. */
6089 if (data->body_includes_call)
6090 available_regs = available_regs - target_clobbered_regs;
6092 /* If we have enough registers. */
6093 if (regs_needed + target_res_regs < available_regs)
6094 cost = n_new;
6095 /* If close to running out of registers, try to preserve them. */
6096 else if (regs_needed <= available_regs)
6097 cost = target_reg_cost [speed] * regs_needed;
6098 /* If we run out of available registers but the number of candidates
6099 does not, we penalize extra registers using target_spill_cost. */
6100 else if (n_cands <= available_regs)
6101 cost = target_reg_cost [speed] * available_regs
6102 + target_spill_cost [speed] * (regs_needed - available_regs);
6103 /* If the number of candidates runs out available registers, we penalize
6104 extra candidate registers using target_spill_cost * 2. Because it is
6105 more expensive to spill induction variable than invariant. */
6106 else
6107 cost = target_reg_cost [speed] * available_regs
6108 + target_spill_cost [speed] * (n_cands - available_regs) * 2
6109 + target_spill_cost [speed] * (regs_needed - n_cands);
6111 /* Finally, add the number of candidates, so that we prefer eliminating
6112 induction variables if possible. */
6113 return cost + n_cands;
6116 /* For each size of the induction variable set determine the penalty. */
6118 static void
6119 determine_set_costs (struct ivopts_data *data)
6121 unsigned j, n;
6122 gphi *phi;
6123 gphi_iterator psi;
6124 tree op;
6125 class loop *loop = data->current_loop;
6126 bitmap_iterator bi;
6128 if (dump_file && (dump_flags & TDF_DETAILS))
6130 fprintf (dump_file, "<Global Costs>:\n");
6131 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6132 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6133 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6134 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6137 n = 0;
6138 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6140 phi = psi.phi ();
6141 op = PHI_RESULT (phi);
6143 if (virtual_operand_p (op))
6144 continue;
6146 if (get_iv (data, op))
6147 continue;
6149 if (!POINTER_TYPE_P (TREE_TYPE (op))
6150 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6151 continue;
6153 n++;
6156 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6158 struct version_info *info = ver_info (data, j);
6160 if (info->inv_id && info->has_nonlin_use)
6161 n++;
6164 data->regs_used = n;
6165 if (dump_file && (dump_flags & TDF_DETAILS))
6166 fprintf (dump_file, " regs_used %d\n", n);
6168 if (dump_file && (dump_flags & TDF_DETAILS))
6170 fprintf (dump_file, " cost for size:\n");
6171 fprintf (dump_file, " ivs\tcost\n");
6172 for (j = 0; j <= 2 * target_avail_regs; j++)
6173 fprintf (dump_file, " %d\t%d\n", j,
6174 ivopts_estimate_reg_pressure (data, 0, j));
6175 fprintf (dump_file, "\n");
6179 /* Returns true if A is a cheaper cost pair than B. */
6181 static bool
6182 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6184 if (!a)
6185 return false;
6187 if (!b)
6188 return true;
6190 if (a->cost < b->cost)
6191 return true;
6193 if (b->cost < a->cost)
6194 return false;
6196 /* In case the costs are the same, prefer the cheaper candidate. */
6197 if (a->cand->cost < b->cand->cost)
6198 return true;
6200 return false;
6203 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6204 for more expensive, equal and cheaper respectively. */
6206 static int
6207 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6209 if (cheaper_cost_pair (a, b))
6210 return -1;
6211 if (cheaper_cost_pair (b, a))
6212 return 1;
6214 return 0;
6217 /* Returns candidate by that USE is expressed in IVS. */
6219 static class cost_pair *
6220 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6222 return ivs->cand_for_group[group->id];
6225 /* Computes the cost field of IVS structure. */
6227 static void
6228 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6230 comp_cost cost = ivs->cand_use_cost;
6232 cost += ivs->cand_cost;
6233 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6234 ivs->cost = cost;
6237 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6238 and IVS. */
6240 static void
6241 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6243 bitmap_iterator bi;
6244 unsigned iid;
6246 if (!invs)
6247 return;
6249 gcc_assert (n_inv_uses != NULL);
6250 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6252 n_inv_uses[iid]--;
6253 if (n_inv_uses[iid] == 0)
6254 ivs->n_invs--;
6258 /* Set USE not to be expressed by any candidate in IVS. */
6260 static void
6261 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6262 struct iv_group *group)
6264 unsigned gid = group->id, cid;
6265 class cost_pair *cp;
6267 cp = ivs->cand_for_group[gid];
6268 if (!cp)
6269 return;
6270 cid = cp->cand->id;
6272 ivs->bad_groups++;
6273 ivs->cand_for_group[gid] = NULL;
6274 ivs->n_cand_uses[cid]--;
6276 if (ivs->n_cand_uses[cid] == 0)
6278 bitmap_clear_bit (ivs->cands, cid);
6279 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6280 ivs->n_cands--;
6281 ivs->cand_cost -= cp->cand->cost;
6282 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6283 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6286 ivs->cand_use_cost -= cp->cost;
6287 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6288 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6289 iv_ca_recount_cost (data, ivs);
6292 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6293 IVS. */
6295 static void
6296 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6298 bitmap_iterator bi;
6299 unsigned iid;
6301 if (!invs)
6302 return;
6304 gcc_assert (n_inv_uses != NULL);
6305 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6307 n_inv_uses[iid]++;
6308 if (n_inv_uses[iid] == 1)
6309 ivs->n_invs++;
6313 /* Set cost pair for GROUP in set IVS to CP. */
6315 static void
6316 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6317 struct iv_group *group, class cost_pair *cp)
6319 unsigned gid = group->id, cid;
6321 if (ivs->cand_for_group[gid] == cp)
6322 return;
6324 if (ivs->cand_for_group[gid])
6325 iv_ca_set_no_cp (data, ivs, group);
6327 if (cp)
6329 cid = cp->cand->id;
6331 ivs->bad_groups--;
6332 ivs->cand_for_group[gid] = cp;
6333 ivs->n_cand_uses[cid]++;
6334 if (ivs->n_cand_uses[cid] == 1)
6336 bitmap_set_bit (ivs->cands, cid);
6337 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6338 ivs->n_cands++;
6339 ivs->cand_cost += cp->cand->cost;
6340 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6341 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6344 ivs->cand_use_cost += cp->cost;
6345 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6346 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6347 iv_ca_recount_cost (data, ivs);
6351 /* Extend set IVS by expressing USE by some of the candidates in it
6352 if possible. Consider all important candidates if candidates in
6353 set IVS don't give any result. */
6355 static void
6356 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6357 struct iv_group *group)
6359 class cost_pair *best_cp = NULL, *cp;
6360 bitmap_iterator bi;
6361 unsigned i;
6362 struct iv_cand *cand;
6364 gcc_assert (ivs->upto >= group->id);
6365 ivs->upto++;
6366 ivs->bad_groups++;
6368 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6370 cand = data->vcands[i];
6371 cp = get_group_iv_cost (data, group, cand);
6372 if (cheaper_cost_pair (cp, best_cp))
6373 best_cp = cp;
6376 if (best_cp == NULL)
6378 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6380 cand = data->vcands[i];
6381 cp = get_group_iv_cost (data, group, cand);
6382 if (cheaper_cost_pair (cp, best_cp))
6383 best_cp = cp;
6387 iv_ca_set_cp (data, ivs, group, best_cp);
6390 /* Get cost for assignment IVS. */
6392 static comp_cost
6393 iv_ca_cost (class iv_ca *ivs)
6395 /* This was a conditional expression but it triggered a bug in
6396 Sun C 5.5. */
6397 if (ivs->bad_groups)
6398 return infinite_cost;
6399 else
6400 return ivs->cost;
6403 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6404 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6405 respectively. */
6407 static int
6408 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6409 struct iv_group *group, class cost_pair *old_cp,
6410 class cost_pair *new_cp)
6412 gcc_assert (old_cp && new_cp && old_cp != new_cp);
6413 unsigned old_n_invs = ivs->n_invs;
6414 iv_ca_set_cp (data, ivs, group, new_cp);
6415 unsigned new_n_invs = ivs->n_invs;
6416 iv_ca_set_cp (data, ivs, group, old_cp);
6418 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6421 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6422 it before NEXT. */
6424 static struct iv_ca_delta *
6425 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6426 class cost_pair *new_cp, struct iv_ca_delta *next)
6428 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6430 change->group = group;
6431 change->old_cp = old_cp;
6432 change->new_cp = new_cp;
6433 change->next = next;
6435 return change;
6438 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6439 are rewritten. */
6441 static struct iv_ca_delta *
6442 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6444 struct iv_ca_delta *last;
6446 if (!l2)
6447 return l1;
6449 if (!l1)
6450 return l2;
6452 for (last = l1; last->next; last = last->next)
6453 continue;
6454 last->next = l2;
6456 return l1;
6459 /* Reverse the list of changes DELTA, forming the inverse to it. */
6461 static struct iv_ca_delta *
6462 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6464 struct iv_ca_delta *act, *next, *prev = NULL;
6466 for (act = delta; act; act = next)
6468 next = act->next;
6469 act->next = prev;
6470 prev = act;
6472 std::swap (act->old_cp, act->new_cp);
6475 return prev;
6478 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6479 reverted instead. */
6481 static void
6482 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6483 struct iv_ca_delta *delta, bool forward)
6485 class cost_pair *from, *to;
6486 struct iv_ca_delta *act;
6488 if (!forward)
6489 delta = iv_ca_delta_reverse (delta);
6491 for (act = delta; act; act = act->next)
6493 from = act->old_cp;
6494 to = act->new_cp;
6495 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6496 iv_ca_set_cp (data, ivs, act->group, to);
6499 if (!forward)
6500 iv_ca_delta_reverse (delta);
6503 /* Returns true if CAND is used in IVS. */
6505 static bool
6506 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6508 return ivs->n_cand_uses[cand->id] > 0;
6511 /* Returns number of induction variable candidates in the set IVS. */
6513 static unsigned
6514 iv_ca_n_cands (class iv_ca *ivs)
6516 return ivs->n_cands;
6519 /* Free the list of changes DELTA. */
6521 static void
6522 iv_ca_delta_free (struct iv_ca_delta **delta)
6524 struct iv_ca_delta *act, *next;
6526 for (act = *delta; act; act = next)
6528 next = act->next;
6529 free (act);
6532 *delta = NULL;
6535 /* Allocates new iv candidates assignment. */
6537 static class iv_ca *
6538 iv_ca_new (struct ivopts_data *data)
6540 class iv_ca *nw = XNEW (class iv_ca);
6542 nw->upto = 0;
6543 nw->bad_groups = 0;
6544 nw->cand_for_group = XCNEWVEC (class cost_pair *,
6545 data->vgroups.length ());
6546 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6547 nw->cands = BITMAP_ALLOC (NULL);
6548 nw->n_cands = 0;
6549 nw->n_invs = 0;
6550 nw->cand_use_cost = no_cost;
6551 nw->cand_cost = 0;
6552 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6553 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6554 nw->cost = no_cost;
6556 return nw;
6559 /* Free memory occupied by the set IVS. */
6561 static void
6562 iv_ca_free (class iv_ca **ivs)
6564 free ((*ivs)->cand_for_group);
6565 free ((*ivs)->n_cand_uses);
6566 BITMAP_FREE ((*ivs)->cands);
6567 free ((*ivs)->n_inv_var_uses);
6568 free ((*ivs)->n_inv_expr_uses);
6569 free (*ivs);
6570 *ivs = NULL;
6573 /* Dumps IVS to FILE. */
6575 static void
6576 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6578 unsigned i;
6579 comp_cost cost = iv_ca_cost (ivs);
6581 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6582 cost.complexity);
6583 fprintf (file, " reg_cost: %d\n",
6584 ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6585 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6586 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6587 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6588 bitmap_print (file, ivs->cands, " candidates: ","\n");
6590 for (i = 0; i < ivs->upto; i++)
6592 struct iv_group *group = data->vgroups[i];
6593 class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6594 if (cp)
6595 fprintf (file, " group:%d --> iv_cand:%d, cost=("
6596 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6597 cp->cost.cost, cp->cost.complexity);
6598 else
6599 fprintf (file, " group:%d --> ??\n", group->id);
6602 const char *pref = "";
6603 fprintf (file, " invariant variables: ");
6604 for (i = 1; i <= data->max_inv_var_id; i++)
6605 if (ivs->n_inv_var_uses[i])
6607 fprintf (file, "%s%d", pref, i);
6608 pref = ", ";
6611 pref = "";
6612 fprintf (file, "\n invariant expressions: ");
6613 for (i = 1; i <= data->max_inv_expr_id; i++)
6614 if (ivs->n_inv_expr_uses[i])
6616 fprintf (file, "%s%d", pref, i);
6617 pref = ", ";
6620 fprintf (file, "\n\n");
6623 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6624 new set, and store differences in DELTA. Number of induction variables
6625 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6626 the function will try to find a solution with mimimal iv candidates. */
6628 static comp_cost
6629 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6630 struct iv_cand *cand, struct iv_ca_delta **delta,
6631 unsigned *n_ivs, bool min_ncand)
6633 unsigned i;
6634 comp_cost cost;
6635 struct iv_group *group;
6636 class cost_pair *old_cp, *new_cp;
6638 *delta = NULL;
6639 for (i = 0; i < ivs->upto; i++)
6641 group = data->vgroups[i];
6642 old_cp = iv_ca_cand_for_group (ivs, group);
6644 if (old_cp
6645 && old_cp->cand == cand)
6646 continue;
6648 new_cp = get_group_iv_cost (data, group, cand);
6649 if (!new_cp)
6650 continue;
6652 if (!min_ncand)
6654 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6655 /* Skip if new_cp depends on more invariants. */
6656 if (cmp_invs > 0)
6657 continue;
6659 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6660 /* Skip if new_cp is not cheaper. */
6661 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6662 continue;
6665 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6668 iv_ca_delta_commit (data, ivs, *delta, true);
6669 cost = iv_ca_cost (ivs);
6670 if (n_ivs)
6671 *n_ivs = iv_ca_n_cands (ivs);
6672 iv_ca_delta_commit (data, ivs, *delta, false);
6674 return cost;
6677 /* Try narrowing set IVS by removing CAND. Return the cost of
6678 the new set and store the differences in DELTA. START is
6679 the candidate with which we start narrowing. */
6681 static comp_cost
6682 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6683 struct iv_cand *cand, struct iv_cand *start,
6684 struct iv_ca_delta **delta)
6686 unsigned i, ci;
6687 struct iv_group *group;
6688 class cost_pair *old_cp, *new_cp, *cp;
6689 bitmap_iterator bi;
6690 struct iv_cand *cnd;
6691 comp_cost cost, best_cost, acost;
6693 *delta = NULL;
6694 for (i = 0; i < data->vgroups.length (); i++)
6696 group = data->vgroups[i];
6698 old_cp = iv_ca_cand_for_group (ivs, group);
6699 if (old_cp->cand != cand)
6700 continue;
6702 best_cost = iv_ca_cost (ivs);
6703 /* Start narrowing with START. */
6704 new_cp = get_group_iv_cost (data, group, start);
6706 if (data->consider_all_candidates)
6708 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6710 if (ci == cand->id || (start && ci == start->id))
6711 continue;
6713 cnd = data->vcands[ci];
6715 cp = get_group_iv_cost (data, group, cnd);
6716 if (!cp)
6717 continue;
6719 iv_ca_set_cp (data, ivs, group, cp);
6720 acost = iv_ca_cost (ivs);
6722 if (acost < best_cost)
6724 best_cost = acost;
6725 new_cp = cp;
6729 else
6731 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6733 if (ci == cand->id || (start && ci == start->id))
6734 continue;
6736 cnd = data->vcands[ci];
6738 cp = get_group_iv_cost (data, group, cnd);
6739 if (!cp)
6740 continue;
6742 iv_ca_set_cp (data, ivs, group, cp);
6743 acost = iv_ca_cost (ivs);
6745 if (acost < best_cost)
6747 best_cost = acost;
6748 new_cp = cp;
6752 /* Restore to old cp for use. */
6753 iv_ca_set_cp (data, ivs, group, old_cp);
6755 if (!new_cp)
6757 iv_ca_delta_free (delta);
6758 return infinite_cost;
6761 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6764 iv_ca_delta_commit (data, ivs, *delta, true);
6765 cost = iv_ca_cost (ivs);
6766 iv_ca_delta_commit (data, ivs, *delta, false);
6768 return cost;
6771 /* Try optimizing the set of candidates IVS by removing candidates different
6772 from to EXCEPT_CAND from it. Return cost of the new set, and store
6773 differences in DELTA. */
6775 static comp_cost
6776 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6777 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6779 bitmap_iterator bi;
6780 struct iv_ca_delta *act_delta, *best_delta;
6781 unsigned i;
6782 comp_cost best_cost, acost;
6783 struct iv_cand *cand;
6785 best_delta = NULL;
6786 best_cost = iv_ca_cost (ivs);
6788 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6790 cand = data->vcands[i];
6792 if (cand == except_cand)
6793 continue;
6795 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6797 if (acost < best_cost)
6799 best_cost = acost;
6800 iv_ca_delta_free (&best_delta);
6801 best_delta = act_delta;
6803 else
6804 iv_ca_delta_free (&act_delta);
6807 if (!best_delta)
6809 *delta = NULL;
6810 return best_cost;
6813 /* Recurse to possibly remove other unnecessary ivs. */
6814 iv_ca_delta_commit (data, ivs, best_delta, true);
6815 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6816 iv_ca_delta_commit (data, ivs, best_delta, false);
6817 *delta = iv_ca_delta_join (best_delta, *delta);
6818 return best_cost;
6821 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6822 cheaper local cost for GROUP than BEST_CP. Return pointer to
6823 the corresponding cost_pair, otherwise just return BEST_CP. */
6825 static class cost_pair*
6826 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6827 unsigned int cand_idx, struct iv_cand *old_cand,
6828 class cost_pair *best_cp)
6830 struct iv_cand *cand;
6831 class cost_pair *cp;
6833 gcc_assert (old_cand != NULL && best_cp != NULL);
6834 if (cand_idx == old_cand->id)
6835 return best_cp;
6837 cand = data->vcands[cand_idx];
6838 cp = get_group_iv_cost (data, group, cand);
6839 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6840 return cp;
6842 return best_cp;
6845 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6846 which are used by more than one iv uses. For each of those candidates,
6847 this function tries to represent iv uses under that candidate using
6848 other ones with lower local cost, then tries to prune the new set.
6849 If the new set has lower cost, It returns the new cost after recording
6850 candidate replacement in list DELTA. */
6852 static comp_cost
6853 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6854 struct iv_ca_delta **delta)
6856 bitmap_iterator bi, bj;
6857 unsigned int i, j, k;
6858 struct iv_cand *cand;
6859 comp_cost orig_cost, acost;
6860 struct iv_ca_delta *act_delta, *tmp_delta;
6861 class cost_pair *old_cp, *best_cp = NULL;
6863 *delta = NULL;
6864 orig_cost = iv_ca_cost (ivs);
6866 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6868 if (ivs->n_cand_uses[i] == 1
6869 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6870 continue;
6872 cand = data->vcands[i];
6874 act_delta = NULL;
6875 /* Represent uses under current candidate using other ones with
6876 lower local cost. */
6877 for (j = 0; j < ivs->upto; j++)
6879 struct iv_group *group = data->vgroups[j];
6880 old_cp = iv_ca_cand_for_group (ivs, group);
6882 if (old_cp->cand != cand)
6883 continue;
6885 best_cp = old_cp;
6886 if (data->consider_all_candidates)
6887 for (k = 0; k < data->vcands.length (); k++)
6888 best_cp = cheaper_cost_with_cand (data, group, k,
6889 old_cp->cand, best_cp);
6890 else
6891 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6892 best_cp = cheaper_cost_with_cand (data, group, k,
6893 old_cp->cand, best_cp);
6895 if (best_cp == old_cp)
6896 continue;
6898 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6900 /* No need for further prune. */
6901 if (!act_delta)
6902 continue;
6904 /* Prune the new candidate set. */
6905 iv_ca_delta_commit (data, ivs, act_delta, true);
6906 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6907 iv_ca_delta_commit (data, ivs, act_delta, false);
6908 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6910 if (acost < orig_cost)
6912 *delta = act_delta;
6913 return acost;
6915 else
6916 iv_ca_delta_free (&act_delta);
6919 return orig_cost;
6922 /* Tries to extend the sets IVS in the best possible way in order to
6923 express the GROUP. If ORIGINALP is true, prefer candidates from
6924 the original set of IVs, otherwise favor important candidates not
6925 based on any memory object. */
6927 static bool
6928 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6929 struct iv_group *group, bool originalp)
6931 comp_cost best_cost, act_cost;
6932 unsigned i;
6933 bitmap_iterator bi;
6934 struct iv_cand *cand;
6935 struct iv_ca_delta *best_delta = NULL, *act_delta;
6936 class cost_pair *cp;
6938 iv_ca_add_group (data, ivs, group);
6939 best_cost = iv_ca_cost (ivs);
6940 cp = iv_ca_cand_for_group (ivs, group);
6941 if (cp)
6943 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6944 iv_ca_set_no_cp (data, ivs, group);
6947 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6948 first try important candidates not based on any memory object. Only if
6949 this fails, try the specific ones. Rationale -- in loops with many
6950 variables the best choice often is to use just one generic biv. If we
6951 added here many ivs specific to the uses, the optimization algorithm later
6952 would be likely to get stuck in a local minimum, thus causing us to create
6953 too many ivs. The approach from few ivs to more seems more likely to be
6954 successful -- starting from few ivs, replacing an expensive use by a
6955 specific iv should always be a win. */
6956 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6958 cand = data->vcands[i];
6960 if (originalp && cand->pos !=IP_ORIGINAL)
6961 continue;
6963 if (!originalp && cand->iv->base_object != NULL_TREE)
6964 continue;
6966 if (iv_ca_cand_used_p (ivs, cand))
6967 continue;
6969 cp = get_group_iv_cost (data, group, cand);
6970 if (!cp)
6971 continue;
6973 iv_ca_set_cp (data, ivs, group, cp);
6974 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6975 true);
6976 iv_ca_set_no_cp (data, ivs, group);
6977 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6979 if (act_cost < best_cost)
6981 best_cost = act_cost;
6983 iv_ca_delta_free (&best_delta);
6984 best_delta = act_delta;
6986 else
6987 iv_ca_delta_free (&act_delta);
6990 if (best_cost.infinite_cost_p ())
6992 for (i = 0; i < group->n_map_members; i++)
6994 cp = group->cost_map + i;
6995 cand = cp->cand;
6996 if (!cand)
6997 continue;
6999 /* Already tried this. */
7000 if (cand->important)
7002 if (originalp && cand->pos == IP_ORIGINAL)
7003 continue;
7004 if (!originalp && cand->iv->base_object == NULL_TREE)
7005 continue;
7008 if (iv_ca_cand_used_p (ivs, cand))
7009 continue;
7011 act_delta = NULL;
7012 iv_ca_set_cp (data, ivs, group, cp);
7013 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7014 iv_ca_set_no_cp (data, ivs, group);
7015 act_delta = iv_ca_delta_add (group,
7016 iv_ca_cand_for_group (ivs, group),
7017 cp, act_delta);
7019 if (act_cost < best_cost)
7021 best_cost = act_cost;
7023 if (best_delta)
7024 iv_ca_delta_free (&best_delta);
7025 best_delta = act_delta;
7027 else
7028 iv_ca_delta_free (&act_delta);
7032 iv_ca_delta_commit (data, ivs, best_delta, true);
7033 iv_ca_delta_free (&best_delta);
7035 return !best_cost.infinite_cost_p ();
7038 /* Finds an initial assignment of candidates to uses. */
7040 static class iv_ca *
7041 get_initial_solution (struct ivopts_data *data, bool originalp)
7043 unsigned i;
7044 class iv_ca *ivs = iv_ca_new (data);
7046 for (i = 0; i < data->vgroups.length (); i++)
7047 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7049 iv_ca_free (&ivs);
7050 return NULL;
7053 return ivs;
7056 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7057 points to a bool variable, this function tries to break local
7058 optimal fixed-point by replacing candidates in IVS if it's true. */
7060 static bool
7061 try_improve_iv_set (struct ivopts_data *data,
7062 class iv_ca *ivs, bool *try_replace_p)
7064 unsigned i, n_ivs;
7065 comp_cost acost, best_cost = iv_ca_cost (ivs);
7066 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7067 struct iv_cand *cand;
7069 /* Try extending the set of induction variables by one. */
7070 for (i = 0; i < data->vcands.length (); i++)
7072 cand = data->vcands[i];
7074 if (iv_ca_cand_used_p (ivs, cand))
7075 continue;
7077 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7078 if (!act_delta)
7079 continue;
7081 /* If we successfully added the candidate and the set is small enough,
7082 try optimizing it by removing other candidates. */
7083 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7085 iv_ca_delta_commit (data, ivs, act_delta, true);
7086 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7087 iv_ca_delta_commit (data, ivs, act_delta, false);
7088 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7091 if (acost < best_cost)
7093 best_cost = acost;
7094 iv_ca_delta_free (&best_delta);
7095 best_delta = act_delta;
7097 else
7098 iv_ca_delta_free (&act_delta);
7101 if (!best_delta)
7103 /* Try removing the candidates from the set instead. */
7104 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7106 if (!best_delta && *try_replace_p)
7108 *try_replace_p = false;
7109 /* So far candidate selecting algorithm tends to choose fewer IVs
7110 so that it can handle cases in which loops have many variables
7111 but the best choice is often to use only one general biv. One
7112 weakness is it can't handle opposite cases, in which different
7113 candidates should be chosen with respect to each use. To solve
7114 the problem, we replace candidates in a manner described by the
7115 comments of iv_ca_replace, thus give general algorithm a chance
7116 to break local optimal fixed-point in these cases. */
7117 best_cost = iv_ca_replace (data, ivs, &best_delta);
7120 if (!best_delta)
7121 return false;
7124 iv_ca_delta_commit (data, ivs, best_delta, true);
7125 iv_ca_delta_free (&best_delta);
7126 return best_cost == iv_ca_cost (ivs);
7129 /* Attempts to find the optimal set of induction variables. We do simple
7130 greedy heuristic -- we try to replace at most one candidate in the selected
7131 solution and remove the unused ivs while this improves the cost. */
7133 static class iv_ca *
7134 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7136 class iv_ca *set;
7137 bool try_replace_p = true;
7139 /* Get the initial solution. */
7140 set = get_initial_solution (data, originalp);
7141 if (!set)
7143 if (dump_file && (dump_flags & TDF_DETAILS))
7144 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7145 return NULL;
7148 if (dump_file && (dump_flags & TDF_DETAILS))
7150 fprintf (dump_file, "Initial set of candidates:\n");
7151 iv_ca_dump (data, dump_file, set);
7154 while (try_improve_iv_set (data, set, &try_replace_p))
7156 if (dump_file && (dump_flags & TDF_DETAILS))
7158 fprintf (dump_file, "Improved to:\n");
7159 iv_ca_dump (data, dump_file, set);
7163 /* If the set has infinite_cost, it can't be optimal. */
7164 if (iv_ca_cost (set).infinite_cost_p ())
7166 if (dump_file && (dump_flags & TDF_DETAILS))
7167 fprintf (dump_file,
7168 "Overflow to infinite cost in try_improve_iv_set.\n");
7169 iv_ca_free (&set);
7171 return set;
7174 static class iv_ca *
7175 find_optimal_iv_set (struct ivopts_data *data)
7177 unsigned i;
7178 comp_cost cost, origcost;
7179 class iv_ca *set, *origset;
7181 /* Determine the cost based on a strategy that starts with original IVs,
7182 and try again using a strategy that prefers candidates not based
7183 on any IVs. */
7184 origset = find_optimal_iv_set_1 (data, true);
7185 set = find_optimal_iv_set_1 (data, false);
7187 if (!origset && !set)
7188 return NULL;
7190 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7191 cost = set ? iv_ca_cost (set) : infinite_cost;
7193 if (dump_file && (dump_flags & TDF_DETAILS))
7195 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7196 origcost.cost, origcost.complexity);
7197 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7198 cost.cost, cost.complexity);
7201 /* Choose the one with the best cost. */
7202 if (origcost <= cost)
7204 if (set)
7205 iv_ca_free (&set);
7206 set = origset;
7208 else if (origset)
7209 iv_ca_free (&origset);
7211 for (i = 0; i < data->vgroups.length (); i++)
7213 struct iv_group *group = data->vgroups[i];
7214 group->selected = iv_ca_cand_for_group (set, group)->cand;
7217 return set;
7220 /* Creates a new induction variable corresponding to CAND. */
7222 static void
7223 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7225 gimple_stmt_iterator incr_pos;
7226 tree base;
7227 struct iv_use *use;
7228 struct iv_group *group;
7229 bool after = false;
7231 gcc_assert (cand->iv != NULL);
7233 switch (cand->pos)
7235 case IP_NORMAL:
7236 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7237 break;
7239 case IP_END:
7240 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7241 after = true;
7242 if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7244 edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7245 incr_pos = gsi_after_labels (split_edge (e));
7246 after = false;
7248 break;
7250 case IP_AFTER_USE:
7251 after = true;
7252 /* fall through */
7253 case IP_BEFORE_USE:
7254 incr_pos = gsi_for_stmt (cand->incremented_at);
7255 break;
7257 case IP_ORIGINAL:
7258 /* Mark that the iv is preserved. */
7259 name_info (data, cand->var_before)->preserve_biv = true;
7260 name_info (data, cand->var_after)->preserve_biv = true;
7262 /* Rewrite the increment so that it uses var_before directly. */
7263 use = find_interesting_uses_op (data, cand->var_after);
7264 group = data->vgroups[use->group_id];
7265 group->selected = cand;
7266 return;
7269 gimple_add_tmp_var (cand->var_before);
7271 base = unshare_expr (cand->iv->base);
7273 create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7274 cand->var_before, data->current_loop,
7275 &incr_pos, after, &cand->var_before, &cand->var_after);
7278 /* Creates new induction variables described in SET. */
7280 static void
7281 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7283 unsigned i;
7284 struct iv_cand *cand;
7285 bitmap_iterator bi;
7287 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7289 cand = data->vcands[i];
7290 create_new_iv (data, cand);
7293 if (dump_file && (dump_flags & TDF_DETAILS))
7295 fprintf (dump_file, "Selected IV set for loop %d",
7296 data->current_loop->num);
7297 if (data->loop_loc != UNKNOWN_LOCATION)
7298 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7299 LOCATION_LINE (data->loop_loc));
7300 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7301 avg_loop_niter (data->current_loop));
7302 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7303 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7305 cand = data->vcands[i];
7306 dump_cand (dump_file, cand);
7308 fprintf (dump_file, "\n");
7312 /* Rewrites USE (definition of iv used in a nonlinear expression)
7313 using candidate CAND. */
7315 static void
7316 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7317 struct iv_use *use, struct iv_cand *cand)
7319 gassign *ass;
7320 gimple_stmt_iterator bsi;
7321 tree comp, type = get_use_type (use), tgt;
7323 /* An important special case -- if we are asked to express value of
7324 the original iv by itself, just exit; there is no need to
7325 introduce a new computation (that might also need casting the
7326 variable to unsigned and back). */
7327 if (cand->pos == IP_ORIGINAL
7328 && cand->incremented_at == use->stmt)
7330 tree op = NULL_TREE;
7331 enum tree_code stmt_code;
7333 gcc_assert (is_gimple_assign (use->stmt));
7334 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7336 /* Check whether we may leave the computation unchanged.
7337 This is the case only if it does not rely on other
7338 computations in the loop -- otherwise, the computation
7339 we rely upon may be removed in remove_unused_ivs,
7340 thus leading to ICE. */
7341 stmt_code = gimple_assign_rhs_code (use->stmt);
7342 if (stmt_code == PLUS_EXPR
7343 || stmt_code == MINUS_EXPR
7344 || stmt_code == POINTER_PLUS_EXPR)
7346 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7347 op = gimple_assign_rhs2 (use->stmt);
7348 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7349 op = gimple_assign_rhs1 (use->stmt);
7352 if (op != NULL_TREE)
7354 if (expr_invariant_in_loop_p (data->current_loop, op))
7355 return;
7356 if (TREE_CODE (op) == SSA_NAME)
7358 struct iv *iv = get_iv (data, op);
7359 if (iv != NULL && integer_zerop (iv->step))
7360 return;
7365 switch (gimple_code (use->stmt))
7367 case GIMPLE_PHI:
7368 tgt = PHI_RESULT (use->stmt);
7370 /* If we should keep the biv, do not replace it. */
7371 if (name_info (data, tgt)->preserve_biv)
7372 return;
7374 bsi = gsi_after_labels (gimple_bb (use->stmt));
7375 break;
7377 case GIMPLE_ASSIGN:
7378 tgt = gimple_assign_lhs (use->stmt);
7379 bsi = gsi_for_stmt (use->stmt);
7380 break;
7382 default:
7383 gcc_unreachable ();
7386 aff_tree aff_inv, aff_var;
7387 if (!get_computation_aff_1 (data->current_loop, use->stmt,
7388 use, cand, &aff_inv, &aff_var))
7389 gcc_unreachable ();
7391 unshare_aff_combination (&aff_inv);
7392 unshare_aff_combination (&aff_var);
7393 /* Prefer CSE opportunity than loop invariant by adding offset at last
7394 so that iv_uses have different offsets can be CSEed. */
7395 poly_widest_int offset = aff_inv.offset;
7396 aff_inv.offset = 0;
7398 gimple_seq stmt_list = NULL, seq = NULL;
7399 tree comp_op1 = aff_combination_to_tree (&aff_inv);
7400 tree comp_op2 = aff_combination_to_tree (&aff_var);
7401 gcc_assert (comp_op1 && comp_op2);
7403 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7404 gimple_seq_add_seq (&stmt_list, seq);
7405 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7406 gimple_seq_add_seq (&stmt_list, seq);
7408 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7409 std::swap (comp_op1, comp_op2);
7411 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7413 comp = fold_build_pointer_plus (comp_op1,
7414 fold_convert (sizetype, comp_op2));
7415 comp = fold_build_pointer_plus (comp,
7416 wide_int_to_tree (sizetype, offset));
7418 else
7420 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7421 fold_convert (TREE_TYPE (comp_op1), comp_op2));
7422 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7423 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7426 comp = fold_convert (type, comp);
7427 comp = force_gimple_operand (comp, &seq, false, NULL);
7428 gimple_seq_add_seq (&stmt_list, seq);
7429 if (gimple_code (use->stmt) != GIMPLE_PHI
7430 /* We can't allow re-allocating the stmt as it might be pointed
7431 to still. */
7432 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7433 >= gimple_num_ops (gsi_stmt (bsi))))
7435 comp = force_gimple_operand (comp, &seq, true, NULL);
7436 gimple_seq_add_seq (&stmt_list, seq);
7437 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7439 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7440 /* As this isn't a plain copy we have to reset alignment
7441 information. */
7442 if (SSA_NAME_PTR_INFO (comp))
7443 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7447 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7448 if (gimple_code (use->stmt) == GIMPLE_PHI)
7450 ass = gimple_build_assign (tgt, comp);
7451 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7453 bsi = gsi_for_stmt (use->stmt);
7454 remove_phi_node (&bsi, false);
7456 else
7458 gimple_assign_set_rhs_from_tree (&bsi, comp);
7459 use->stmt = gsi_stmt (bsi);
7463 /* Performs a peephole optimization to reorder the iv update statement with
7464 a mem ref to enable instruction combining in later phases. The mem ref uses
7465 the iv value before the update, so the reordering transformation requires
7466 adjustment of the offset. CAND is the selected IV_CAND.
7468 Example:
7470 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7471 iv2 = iv1 + 1;
7473 if (t < val) (1)
7474 goto L;
7475 goto Head;
7478 directly propagating t over to (1) will introduce overlapping live range
7479 thus increase register pressure. This peephole transform it into:
7482 iv2 = iv1 + 1;
7483 t = MEM_REF (base, iv2, 8, 8);
7484 if (t < val)
7485 goto L;
7486 goto Head;
7489 static void
7490 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7492 tree var_after;
7493 gimple *iv_update, *stmt;
7494 basic_block bb;
7495 gimple_stmt_iterator gsi, gsi_iv;
7497 if (cand->pos != IP_NORMAL)
7498 return;
7500 var_after = cand->var_after;
7501 iv_update = SSA_NAME_DEF_STMT (var_after);
7503 bb = gimple_bb (iv_update);
7504 gsi = gsi_last_nondebug_bb (bb);
7505 stmt = gsi_stmt (gsi);
7507 /* Only handle conditional statement for now. */
7508 if (gimple_code (stmt) != GIMPLE_COND)
7509 return;
7511 gsi_prev_nondebug (&gsi);
7512 stmt = gsi_stmt (gsi);
7513 if (stmt != iv_update)
7514 return;
7516 gsi_prev_nondebug (&gsi);
7517 if (gsi_end_p (gsi))
7518 return;
7520 stmt = gsi_stmt (gsi);
7521 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7522 return;
7524 if (stmt != use->stmt)
7525 return;
7527 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7528 return;
7530 if (dump_file && (dump_flags & TDF_DETAILS))
7532 fprintf (dump_file, "Reordering \n");
7533 print_gimple_stmt (dump_file, iv_update, 0);
7534 print_gimple_stmt (dump_file, use->stmt, 0);
7535 fprintf (dump_file, "\n");
7538 gsi = gsi_for_stmt (use->stmt);
7539 gsi_iv = gsi_for_stmt (iv_update);
7540 gsi_move_before (&gsi_iv, &gsi);
7542 cand->pos = IP_BEFORE_USE;
7543 cand->incremented_at = use->stmt;
7546 /* Return the alias pointer type that should be used for a MEM_REF
7547 associated with USE, which has type USE_PTR_ADDRESS. */
7549 static tree
7550 get_alias_ptr_type_for_ptr_address (iv_use *use)
7552 gcall *call = as_a <gcall *> (use->stmt);
7553 switch (gimple_call_internal_fn (call))
7555 case IFN_MASK_LOAD:
7556 case IFN_MASK_STORE:
7557 case IFN_MASK_LOAD_LANES:
7558 case IFN_MASK_STORE_LANES:
7559 case IFN_MASK_LEN_LOAD_LANES:
7560 case IFN_MASK_LEN_STORE_LANES:
7561 case IFN_LEN_LOAD:
7562 case IFN_LEN_STORE:
7563 case IFN_MASK_LEN_LOAD:
7564 case IFN_MASK_LEN_STORE:
7565 /* The second argument contains the correct alias type. */
7566 gcc_assert (use->op_p == gimple_call_arg_ptr (call, 0));
7567 return TREE_TYPE (gimple_call_arg (call, 1));
7569 default:
7570 gcc_unreachable ();
7575 /* Rewrites USE (address that is an iv) using candidate CAND. */
7577 static void
7578 rewrite_use_address (struct ivopts_data *data,
7579 struct iv_use *use, struct iv_cand *cand)
7581 aff_tree aff;
7582 bool ok;
7584 adjust_iv_update_pos (cand, use);
7585 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7586 gcc_assert (ok);
7587 unshare_aff_combination (&aff);
7589 /* To avoid undefined overflow problems, all IV candidates use unsigned
7590 integer types. The drawback is that this makes it impossible for
7591 create_mem_ref to distinguish an IV that is based on a memory object
7592 from one that represents simply an offset.
7594 To work around this problem, we pass a hint to create_mem_ref that
7595 indicates which variable (if any) in aff is an IV based on a memory
7596 object. Note that we only consider the candidate. If this is not
7597 based on an object, the base of the reference is in some subexpression
7598 of the use -- but these will use pointer types, so they are recognized
7599 by the create_mem_ref heuristics anyway. */
7600 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7601 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7602 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7603 tree type = use->mem_type;
7604 tree alias_ptr_type;
7605 if (use->type == USE_PTR_ADDRESS)
7606 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7607 else
7609 gcc_assert (type == TREE_TYPE (*use->op_p));
7610 unsigned int align = get_object_alignment (*use->op_p);
7611 if (align != TYPE_ALIGN (type))
7612 type = build_aligned_type (type, align);
7613 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7615 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7616 iv, base_hint, data->speed);
7618 if (use->type == USE_PTR_ADDRESS)
7620 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7621 ref = fold_convert (get_use_type (use), ref);
7622 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7623 true, GSI_SAME_STMT);
7625 else
7627 /* When we end up confused enough and have no suitable base but
7628 stuffed everything to index2 use a LEA for the address and
7629 create a plain MEM_REF to avoid basing a memory reference
7630 on address zero which create_mem_ref_raw does as fallback. */
7631 if (TREE_CODE (ref) == TARGET_MEM_REF
7632 && TMR_INDEX2 (ref) != NULL_TREE
7633 && integer_zerop (TREE_OPERAND (ref, 0)))
7635 ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7636 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7637 true, GSI_SAME_STMT);
7638 ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7640 copy_ref_info (ref, *use->op_p);
7643 *use->op_p = ref;
7646 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7647 candidate CAND. */
7649 static void
7650 rewrite_use_compare (struct ivopts_data *data,
7651 struct iv_use *use, struct iv_cand *cand)
7653 tree comp, op, bound;
7654 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7655 enum tree_code compare;
7656 struct iv_group *group = data->vgroups[use->group_id];
7657 class cost_pair *cp = get_group_iv_cost (data, group, cand);
7659 bound = cp->value;
7660 if (bound)
7662 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7663 tree var_type = TREE_TYPE (var);
7664 gimple_seq stmts;
7666 if (dump_file && (dump_flags & TDF_DETAILS))
7668 fprintf (dump_file, "Replacing exit test: ");
7669 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7671 compare = cp->comp;
7672 bound = unshare_expr (fold_convert (var_type, bound));
7673 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7674 if (stmts)
7675 gsi_insert_seq_on_edge_immediate (
7676 loop_preheader_edge (data->current_loop),
7677 stmts);
7679 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7680 gimple_cond_set_lhs (cond_stmt, var);
7681 gimple_cond_set_code (cond_stmt, compare);
7682 gimple_cond_set_rhs (cond_stmt, op);
7683 return;
7686 /* The induction variable elimination failed; just express the original
7687 giv. */
7688 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7689 gcc_assert (comp != NULL_TREE);
7690 gcc_assert (use->op_p != NULL);
7691 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7692 SSA_NAME_VAR (*use->op_p),
7693 true, GSI_SAME_STMT);
7696 /* Rewrite the groups using the selected induction variables. */
7698 static void
7699 rewrite_groups (struct ivopts_data *data)
7701 unsigned i, j;
7703 for (i = 0; i < data->vgroups.length (); i++)
7705 struct iv_group *group = data->vgroups[i];
7706 struct iv_cand *cand = group->selected;
7708 gcc_assert (cand);
7710 if (group->type == USE_NONLINEAR_EXPR)
7712 for (j = 0; j < group->vuses.length (); j++)
7714 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7715 update_stmt (group->vuses[j]->stmt);
7718 else if (address_p (group->type))
7720 for (j = 0; j < group->vuses.length (); j++)
7722 rewrite_use_address (data, group->vuses[j], cand);
7723 update_stmt (group->vuses[j]->stmt);
7726 else
7728 gcc_assert (group->type == USE_COMPARE);
7730 for (j = 0; j < group->vuses.length (); j++)
7732 rewrite_use_compare (data, group->vuses[j], cand);
7733 update_stmt (group->vuses[j]->stmt);
7739 /* Removes the ivs that are not used after rewriting. */
7741 static void
7742 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7744 unsigned j;
7745 bitmap_iterator bi;
7747 /* Figure out an order in which to release SSA DEFs so that we don't
7748 release something that we'd have to propagate into a debug stmt
7749 afterwards. */
7750 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7752 struct version_info *info;
7754 info = ver_info (data, j);
7755 if (info->iv
7756 && !integer_zerop (info->iv->step)
7757 && !info->inv_id
7758 && !info->iv->nonlin_use
7759 && !info->preserve_biv)
7761 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7763 tree def = info->iv->ssa_name;
7765 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7767 imm_use_iterator imm_iter;
7768 use_operand_p use_p;
7769 gimple *stmt;
7770 int count = 0;
7772 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7774 if (!gimple_debug_bind_p (stmt))
7775 continue;
7777 /* We just want to determine whether to do nothing
7778 (count == 0), to substitute the computed
7779 expression into a single use of the SSA DEF by
7780 itself (count == 1), or to use a debug temp
7781 because the SSA DEF is used multiple times or as
7782 part of a larger expression (count > 1). */
7783 count++;
7784 if (gimple_debug_bind_get_value (stmt) != def)
7785 count++;
7787 if (count > 1)
7788 break;
7791 if (!count)
7792 continue;
7794 struct iv_use dummy_use;
7795 struct iv_cand *best_cand = NULL, *cand;
7796 unsigned i, best_pref = 0, cand_pref;
7797 tree comp = NULL_TREE;
7799 memset (&dummy_use, 0, sizeof (dummy_use));
7800 dummy_use.iv = info->iv;
7801 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7803 cand = data->vgroups[i]->selected;
7804 if (cand == best_cand)
7805 continue;
7806 cand_pref = operand_equal_p (cand->iv->step,
7807 info->iv->step, 0)
7808 ? 4 : 0;
7809 cand_pref
7810 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7811 == TYPE_MODE (TREE_TYPE (info->iv->base))
7812 ? 2 : 0;
7813 cand_pref
7814 += TREE_CODE (cand->iv->base) == INTEGER_CST
7815 ? 1 : 0;
7816 if (best_cand == NULL || best_pref < cand_pref)
7818 tree this_comp
7819 = get_debug_computation_at (data->current_loop,
7820 SSA_NAME_DEF_STMT (def),
7821 &dummy_use, cand);
7822 if (this_comp)
7824 best_cand = cand;
7825 best_pref = cand_pref;
7826 comp = this_comp;
7831 if (!best_cand)
7832 continue;
7834 comp = unshare_expr (comp);
7835 if (count > 1)
7837 tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7838 /* FIXME: Is setting the mode really necessary? */
7839 if (SSA_NAME_VAR (def))
7840 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7841 else
7842 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7843 gdebug *def_temp
7844 = gimple_build_debug_bind (vexpr, comp, NULL);
7845 gimple_stmt_iterator gsi;
7847 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7848 gsi = gsi_after_labels (gimple_bb
7849 (SSA_NAME_DEF_STMT (def)));
7850 else
7851 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7853 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7854 comp = vexpr;
7857 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7859 if (!gimple_debug_bind_p (stmt))
7860 continue;
7862 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7863 SET_USE (use_p, comp);
7865 update_stmt (stmt);
7872 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7873 for hash_map::traverse. */
7875 bool
7876 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7878 if (value)
7880 value->~tree_niter_desc ();
7881 free (value);
7883 return true;
7886 /* Frees data allocated by the optimization of a single loop. */
7888 static void
7889 free_loop_data (struct ivopts_data *data)
7891 unsigned i, j;
7892 bitmap_iterator bi;
7893 tree obj;
7895 if (data->niters)
7897 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7898 delete data->niters;
7899 data->niters = NULL;
7902 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7904 struct version_info *info;
7906 info = ver_info (data, i);
7907 info->iv = NULL;
7908 info->has_nonlin_use = false;
7909 info->preserve_biv = false;
7910 info->inv_id = 0;
7912 bitmap_clear (data->relevant);
7913 bitmap_clear (data->important_candidates);
7915 for (i = 0; i < data->vgroups.length (); i++)
7917 struct iv_group *group = data->vgroups[i];
7919 for (j = 0; j < group->vuses.length (); j++)
7920 free (group->vuses[j]);
7921 group->vuses.release ();
7923 BITMAP_FREE (group->related_cands);
7924 for (j = 0; j < group->n_map_members; j++)
7926 if (group->cost_map[j].inv_vars)
7927 BITMAP_FREE (group->cost_map[j].inv_vars);
7928 if (group->cost_map[j].inv_exprs)
7929 BITMAP_FREE (group->cost_map[j].inv_exprs);
7932 free (group->cost_map);
7933 free (group);
7935 data->vgroups.truncate (0);
7937 for (i = 0; i < data->vcands.length (); i++)
7939 struct iv_cand *cand = data->vcands[i];
7941 if (cand->inv_vars)
7942 BITMAP_FREE (cand->inv_vars);
7943 if (cand->inv_exprs)
7944 BITMAP_FREE (cand->inv_exprs);
7945 free (cand);
7947 data->vcands.truncate (0);
7949 if (data->version_info_size < num_ssa_names)
7951 data->version_info_size = 2 * num_ssa_names;
7952 free (data->version_info);
7953 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7956 data->max_inv_var_id = 0;
7957 data->max_inv_expr_id = 0;
7959 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7960 SET_DECL_RTL (obj, NULL_RTX);
7962 decl_rtl_to_reset.truncate (0);
7964 data->inv_expr_tab->empty ();
7966 data->iv_common_cand_tab->empty ();
7967 data->iv_common_cands.truncate (0);
7970 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7971 loop tree. */
7973 static void
7974 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7976 free_loop_data (data);
7977 free (data->version_info);
7978 BITMAP_FREE (data->relevant);
7979 BITMAP_FREE (data->important_candidates);
7981 decl_rtl_to_reset.release ();
7982 data->vgroups.release ();
7983 data->vcands.release ();
7984 delete data->inv_expr_tab;
7985 data->inv_expr_tab = NULL;
7986 free_affine_expand_cache (&data->name_expansion_cache);
7987 if (data->base_object_map)
7988 delete data->base_object_map;
7989 delete data->iv_common_cand_tab;
7990 data->iv_common_cand_tab = NULL;
7991 data->iv_common_cands.release ();
7992 obstack_free (&data->iv_obstack, NULL);
7995 /* Returns true if the loop body BODY includes any function calls. */
7997 static bool
7998 loop_body_includes_call (basic_block *body, unsigned num_nodes)
8000 gimple_stmt_iterator gsi;
8001 unsigned i;
8003 for (i = 0; i < num_nodes; i++)
8004 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8006 gimple *stmt = gsi_stmt (gsi);
8007 if (is_gimple_call (stmt)
8008 && !gimple_call_internal_p (stmt)
8009 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8010 return true;
8012 return false;
8015 /* Determine cost scaling factor for basic blocks in loop. */
8016 #define COST_SCALING_FACTOR_BOUND (20)
8018 static void
8019 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8021 int lfreq = data->current_loop->header->count.to_frequency (cfun);
8022 if (!data->speed || lfreq <= 0)
8023 return;
8025 int max_freq = lfreq;
8026 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8028 body[i]->aux = (void *)(intptr_t) 1;
8029 if (max_freq < body[i]->count.to_frequency (cfun))
8030 max_freq = body[i]->count.to_frequency (cfun);
8032 if (max_freq > lfreq)
8034 int divisor, factor;
8035 /* Check if scaling factor itself needs to be scaled by the bound. This
8036 is to avoid overflow when scaling cost according to profile info. */
8037 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8039 divisor = max_freq;
8040 factor = COST_SCALING_FACTOR_BOUND;
8042 else
8044 divisor = lfreq;
8045 factor = 1;
8047 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8049 int bfreq = body[i]->count.to_frequency (cfun);
8050 if (bfreq <= lfreq)
8051 continue;
8053 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8058 /* Find doloop comparison use and set its doloop_p on if found. */
8060 static bool
8061 find_doloop_use (struct ivopts_data *data)
8063 struct loop *loop = data->current_loop;
8065 for (unsigned i = 0; i < data->vgroups.length (); i++)
8067 struct iv_group *group = data->vgroups[i];
8068 if (group->type == USE_COMPARE)
8070 gcc_assert (group->vuses.length () == 1);
8071 struct iv_use *use = group->vuses[0];
8072 gimple *stmt = use->stmt;
8073 if (gimple_code (stmt) == GIMPLE_COND)
8075 basic_block bb = gimple_bb (stmt);
8076 edge true_edge, false_edge;
8077 extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8078 /* This comparison is used for loop latch. Require latch is empty
8079 for now. */
8080 if ((loop->latch == true_edge->dest
8081 || loop->latch == false_edge->dest)
8082 && empty_block_p (loop->latch))
8084 group->doloop_p = true;
8085 if (dump_file && (dump_flags & TDF_DETAILS))
8087 fprintf (dump_file, "Doloop cmp iv use: ");
8088 print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8090 return true;
8096 return false;
8099 /* For the targets which support doloop, to predict whether later RTL doloop
8100 transformation will perform on this loop, further detect the doloop use and
8101 mark the flag doloop_use_p if predicted. */
8103 void
8104 analyze_and_mark_doloop_use (struct ivopts_data *data)
8106 data->doloop_use_p = false;
8108 if (!flag_branch_on_count_reg)
8109 return;
8111 if (data->current_loop->unroll == USHRT_MAX)
8112 return;
8114 if (!generic_predict_doloop_p (data))
8115 return;
8117 if (find_doloop_use (data))
8119 data->doloop_use_p = true;
8120 if (dump_file && (dump_flags & TDF_DETAILS))
8122 struct loop *loop = data->current_loop;
8123 fprintf (dump_file,
8124 "Predict loop %d can perform"
8125 " doloop optimization later.\n",
8126 loop->num);
8127 flow_loop_dump (loop, dump_file, NULL, 1);
8132 /* Optimizes the LOOP. Returns true if anything changed. */
8134 static bool
8135 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8136 bitmap toremove)
8138 bool changed = false;
8139 class iv_ca *iv_ca;
8140 edge exit = single_dom_exit (loop);
8141 basic_block *body;
8143 gcc_assert (!data->niters);
8144 data->current_loop = loop;
8145 data->loop_loc = find_loop_location (loop).get_location_t ();
8146 data->speed = optimize_loop_for_speed_p (loop);
8148 if (dump_file && (dump_flags & TDF_DETAILS))
8150 fprintf (dump_file, "Processing loop %d", loop->num);
8151 if (data->loop_loc != UNKNOWN_LOCATION)
8152 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8153 LOCATION_LINE (data->loop_loc));
8154 fprintf (dump_file, "\n");
8156 if (exit)
8158 fprintf (dump_file, " single exit %d -> %d, exit condition ",
8159 exit->src->index, exit->dest->index);
8160 print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8161 0, TDF_SLIM);
8162 fprintf (dump_file, "\n");
8165 fprintf (dump_file, "\n");
8168 body = get_loop_body (loop);
8169 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8170 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8172 data->loop_single_exit_p
8173 = exit != NULL && loop_only_exit_p (loop, body, exit);
8175 /* For each ssa name determines whether it behaves as an induction variable
8176 in some loop. */
8177 if (!find_induction_variables (data, body))
8178 goto finish;
8180 /* Finds interesting uses (item 1). */
8181 find_interesting_uses (data, body);
8182 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8183 goto finish;
8185 /* Determine cost scaling factor for basic blocks in loop. */
8186 determine_scaling_factor (data, body);
8188 /* Analyze doloop possibility and mark the doloop use if predicted. */
8189 analyze_and_mark_doloop_use (data);
8191 /* Finds candidates for the induction variables (item 2). */
8192 find_iv_candidates (data);
8194 /* Calculates the costs (item 3, part 1). */
8195 determine_iv_costs (data);
8196 determine_group_iv_costs (data);
8197 determine_set_costs (data);
8199 /* Find the optimal set of induction variables (item 3, part 2). */
8200 iv_ca = find_optimal_iv_set (data);
8201 /* Cleanup basic block aux field. */
8202 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8203 body[i]->aux = NULL;
8204 if (!iv_ca)
8205 goto finish;
8206 changed = true;
8208 /* Create the new induction variables (item 4, part 1). */
8209 create_new_ivs (data, iv_ca);
8210 iv_ca_free (&iv_ca);
8212 /* Rewrite the uses (item 4, part 2). */
8213 rewrite_groups (data);
8215 /* Remove the ivs that are unused after rewriting. */
8216 remove_unused_ivs (data, toremove);
8218 finish:
8219 free (body);
8220 free_loop_data (data);
8222 return changed;
8225 /* Main entry point. Optimizes induction variables in loops. */
8227 void
8228 tree_ssa_iv_optimize (void)
8230 struct ivopts_data data;
8231 auto_bitmap toremove;
8233 tree_ssa_iv_optimize_init (&data);
8234 mark_ssa_maybe_undefs ();
8236 /* Optimize the loops starting with the innermost ones. */
8237 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8239 if (!dbg_cnt (ivopts_loop))
8240 continue;
8242 if (dump_file && (dump_flags & TDF_DETAILS))
8243 flow_loop_dump (loop, dump_file, NULL, 1);
8245 tree_ssa_iv_optimize_loop (&data, loop, toremove);
8248 /* Remove eliminated IV defs. */
8249 release_defs_bitset (toremove);
8251 /* We have changed the structure of induction variables; it might happen
8252 that definitions in the scev database refer to some of them that were
8253 eliminated. */
8254 scev_reset_htab ();
8255 /* Likewise niter and control-IV information. */
8256 free_numbers_of_iterations_estimates (cfun);
8258 tree_ssa_iv_optimize_finalize (&data);
8261 #include "gt-tree-ssa-loop-ivopts.h"