Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / gcc4 / gcc / tree-ssa-loop-ivopts.c
blob46206f702eda4e934c58530475161de0ebe23789
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
151 /* Use. */
152 struct iv_use
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
208 struct ivopts_data
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
244 struct iv_ca
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
259 bitmap cands;
261 /* The number of candidates in the set. */
262 unsigned n_cands;
264 /* Total number of registers needed. */
265 unsigned n_regs;
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
271 unsigned cand_cost;
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
277 unsigned cost;
280 /* Difference of two iv candidate assignments. */
282 struct iv_ca_delta
284 /* Changed use. */
285 struct iv_use *use;
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
356 return loop->aux;
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
361 edge
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
366 if (!exit)
367 return NULL;
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
372 return exit;
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
381 if (iv->ssa_name)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
392 if (iv->step)
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
402 else
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
409 if (iv->base_object)
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
428 switch (use->type)
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
446 default:
447 gcc_unreachable ();
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
474 unsigned i;
475 struct iv_use *use;
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
503 if (!iv)
505 fprintf (file, " final value replacement\n");
506 return;
509 switch (cand->pos)
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
524 dump_iv (file, iv);
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
554 a &= mask;
555 b &= mask;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
566 if (!(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
589 *x = val;
591 return true;
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
602 gcc_assert (bb);
604 if (sbb == loop->latch)
605 return true;
607 if (sbb != bb)
608 return false;
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
626 if (stmt_bb != cand_bb)
627 return true;
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
646 switch (cand->pos)
648 case IP_END:
649 return false;
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
657 default:
658 gcc_unreachable ();
662 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
664 static bool
665 abnormal_ssa_name_p (tree exp)
667 if (!exp)
668 return false;
670 if (TREE_CODE (exp) != SSA_NAME)
671 return false;
673 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
676 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
677 abnormal phi node. Callback for for_each_index. */
679 static bool
680 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
681 void *data ATTRIBUTE_UNUSED)
683 if (TREE_CODE (base) == ARRAY_REF)
685 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
686 return false;
687 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
688 return false;
691 return !abnormal_ssa_name_p (*index);
694 /* Returns true if EXPR contains a ssa name that occurs in an
695 abnormal phi node. */
697 bool
698 contains_abnormal_ssa_name_p (tree expr)
700 enum tree_code code;
701 enum tree_code_class class;
703 if (!expr)
704 return false;
706 code = TREE_CODE (expr);
707 class = TREE_CODE_CLASS (code);
709 if (code == SSA_NAME)
710 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
712 if (code == INTEGER_CST
713 || is_gimple_min_invariant (expr))
714 return false;
716 if (code == ADDR_EXPR)
717 return !for_each_index (&TREE_OPERAND (expr, 0),
718 idx_contains_abnormal_ssa_name_p,
719 NULL);
721 switch (class)
723 case tcc_binary:
724 case tcc_comparison:
725 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
726 return true;
728 /* Fallthru. */
729 case tcc_unary:
730 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
731 return true;
733 break;
735 default:
736 gcc_unreachable ();
739 return false;
742 /* Element of the table in that we cache the numbers of iterations obtained
743 from exits of the loop. */
745 struct nfe_cache_elt
747 /* The edge for that the number of iterations is cached. */
748 edge exit;
750 /* Number of iterations corresponding to this exit, or NULL if it cannot be
751 determined. */
752 tree niter;
755 /* Hash function for nfe_cache_elt E. */
757 static hashval_t
758 nfe_hash (const void *e)
760 const struct nfe_cache_elt *elt = e;
762 return htab_hash_pointer (elt->exit);
765 /* Equality function for nfe_cache_elt E1 and edge E2. */
767 static int
768 nfe_eq (const void *e1, const void *e2)
770 const struct nfe_cache_elt *elt1 = e1;
772 return elt1->exit == e2;
775 /* Returns tree describing number of iterations determined from
776 EXIT of DATA->current_loop, or NULL if something goes wrong. */
778 static tree
779 niter_for_exit (struct ivopts_data *data, edge exit)
781 struct nfe_cache_elt *nfe_desc;
782 struct tree_niter_desc desc;
783 PTR *slot;
785 slot = htab_find_slot_with_hash (data->niters, exit,
786 htab_hash_pointer (exit),
787 INSERT);
789 if (!*slot)
791 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
792 nfe_desc->exit = exit;
794 /* Try to determine number of iterations. We must know it
795 unconditionally (i.e., without possibility of # of iterations
796 being zero). Also, we cannot safely work with ssa names that
797 appear in phi nodes on abnormal edges, so that we do not create
798 overlapping life ranges for them (PR 27283). */
799 if (number_of_iterations_exit (data->current_loop,
800 exit, &desc, true)
801 && zero_p (desc.may_be_zero)
802 && !contains_abnormal_ssa_name_p (desc.niter))
803 nfe_desc->niter = desc.niter;
804 else
805 nfe_desc->niter = NULL_TREE;
807 else
808 nfe_desc = *slot;
810 return nfe_desc->niter;
813 /* Returns tree describing number of iterations determined from
814 single dominating exit of DATA->current_loop, or NULL if something
815 goes wrong. */
817 static tree
818 niter_for_single_dom_exit (struct ivopts_data *data)
820 edge exit = single_dom_exit (data->current_loop);
822 if (!exit)
823 return NULL;
825 return niter_for_exit (data, exit);
828 /* Initializes data structures used by the iv optimization pass, stored
829 in DATA. LOOPS is the loop tree. */
831 static void
832 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
834 unsigned i;
836 data->version_info_size = 2 * num_ssa_names;
837 data->version_info = xcalloc (data->version_info_size,
838 sizeof (struct version_info));
839 data->relevant = BITMAP_ALLOC (NULL);
840 data->important_candidates = BITMAP_ALLOC (NULL);
841 data->max_inv_id = 0;
842 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
844 for (i = 1; i < loops->num; i++)
845 if (loops->parray[i])
846 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
848 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
849 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
850 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
853 /* Returns a memory object to that EXPR points. In case we are able to
854 determine that it does not point to any such object, NULL is returned. */
856 static tree
857 determine_base_object (tree expr)
859 enum tree_code code = TREE_CODE (expr);
860 tree base, obj, op0, op1;
862 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
863 return NULL_TREE;
865 switch (code)
867 case INTEGER_CST:
868 return NULL_TREE;
870 case ADDR_EXPR:
871 obj = TREE_OPERAND (expr, 0);
872 base = get_base_address (obj);
874 if (!base)
875 return expr;
877 if (TREE_CODE (base) == INDIRECT_REF)
878 return determine_base_object (TREE_OPERAND (base, 0));
880 return fold_convert (ptr_type_node,
881 build_fold_addr_expr (base));
883 case PLUS_EXPR:
884 case MINUS_EXPR:
885 op0 = determine_base_object (TREE_OPERAND (expr, 0));
886 op1 = determine_base_object (TREE_OPERAND (expr, 1));
888 if (!op1)
889 return op0;
891 if (!op0)
892 return (code == PLUS_EXPR
893 ? op1
894 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
896 return fold_build2 (code, ptr_type_node, op0, op1);
898 case NOP_EXPR:
899 case CONVERT_EXPR:
900 return determine_base_object (TREE_OPERAND (expr, 0));
902 default:
903 return fold_convert (ptr_type_node, expr);
907 /* Allocates an induction variable with given initial value BASE and step STEP
908 for loop LOOP. */
910 static struct iv *
911 alloc_iv (tree base, tree step)
913 struct iv *iv = xcalloc (1, sizeof (struct iv));
915 if (step && integer_zerop (step))
916 step = NULL_TREE;
918 iv->base = base;
919 iv->base_object = determine_base_object (base);
920 iv->step = step;
921 iv->biv_p = false;
922 iv->have_use_for = false;
923 iv->use_id = 0;
924 iv->ssa_name = NULL_TREE;
926 return iv;
929 /* Sets STEP and BASE for induction variable IV. */
931 static void
932 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
934 struct version_info *info = name_info (data, iv);
936 gcc_assert (!info->iv);
938 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
939 info->iv = alloc_iv (base, step);
940 info->iv->ssa_name = iv;
943 /* Finds induction variable declaration for VAR. */
945 static struct iv *
946 get_iv (struct ivopts_data *data, tree var)
948 basic_block bb;
950 if (!name_info (data, var)->iv)
952 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
954 if (!bb
955 || !flow_bb_inside_loop_p (data->current_loop, bb))
956 set_iv (data, var, var, NULL_TREE);
959 return name_info (data, var)->iv;
962 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
963 not define a simple affine biv with nonzero step. */
965 static tree
966 determine_biv_step (tree phi)
968 struct loop *loop = bb_for_stmt (phi)->loop_father;
969 tree name = PHI_RESULT (phi);
970 affine_iv iv;
972 if (!is_gimple_reg (name))
973 return NULL_TREE;
975 if (!simple_iv (loop, phi, name, &iv, true))
976 return NULL_TREE;
978 return (zero_p (iv.step) ? NULL_TREE : iv.step);
981 /* Finds basic ivs. */
983 static bool
984 find_bivs (struct ivopts_data *data)
986 tree phi, step, type, base;
987 bool found = false;
988 struct loop *loop = data->current_loop;
990 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
992 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
993 continue;
995 step = determine_biv_step (phi);
996 if (!step)
997 continue;
999 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1000 base = expand_simple_operations (base);
1001 if (contains_abnormal_ssa_name_p (base)
1002 || contains_abnormal_ssa_name_p (step))
1003 continue;
1005 type = TREE_TYPE (PHI_RESULT (phi));
1006 base = fold_convert (type, base);
1007 if (step)
1008 step = fold_convert (type, step);
1010 set_iv (data, PHI_RESULT (phi), base, step);
1011 found = true;
1014 return found;
1017 /* Marks basic ivs. */
1019 static void
1020 mark_bivs (struct ivopts_data *data)
1022 tree phi, var;
1023 struct iv *iv, *incr_iv;
1024 struct loop *loop = data->current_loop;
1025 basic_block incr_bb;
1027 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1029 iv = get_iv (data, PHI_RESULT (phi));
1030 if (!iv)
1031 continue;
1033 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1034 incr_iv = get_iv (data, var);
1035 if (!incr_iv)
1036 continue;
1038 /* If the increment is in the subloop, ignore it. */
1039 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1040 if (incr_bb->loop_father != data->current_loop
1041 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1042 continue;
1044 iv->biv_p = true;
1045 incr_iv->biv_p = true;
1049 /* Checks whether STMT defines a linear induction variable and stores its
1050 parameters to IV. */
1052 static bool
1053 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1055 tree lhs;
1056 struct loop *loop = data->current_loop;
1058 iv->base = NULL_TREE;
1059 iv->step = NULL_TREE;
1061 if (TREE_CODE (stmt) != MODIFY_EXPR)
1062 return false;
1064 lhs = TREE_OPERAND (stmt, 0);
1065 if (TREE_CODE (lhs) != SSA_NAME)
1066 return false;
1068 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1069 return false;
1070 iv->base = expand_simple_operations (iv->base);
1072 if (contains_abnormal_ssa_name_p (iv->base)
1073 || contains_abnormal_ssa_name_p (iv->step))
1074 return false;
1076 return true;
1079 /* Finds general ivs in statement STMT. */
1081 static void
1082 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1084 affine_iv iv;
1086 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1087 return;
1089 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1092 /* Finds general ivs in basic block BB. */
1094 static void
1095 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1097 block_stmt_iterator bsi;
1099 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1100 find_givs_in_stmt (data, bsi_stmt (bsi));
1103 /* Finds general ivs. */
1105 static void
1106 find_givs (struct ivopts_data *data)
1108 struct loop *loop = data->current_loop;
1109 basic_block *body = get_loop_body_in_dom_order (loop);
1110 unsigned i;
1112 for (i = 0; i < loop->num_nodes; i++)
1113 find_givs_in_bb (data, body[i]);
1114 free (body);
1117 /* For each ssa name defined in LOOP determines whether it is an induction
1118 variable and if so, its initial value and step. */
1120 static bool
1121 find_induction_variables (struct ivopts_data *data)
1123 unsigned i;
1124 bitmap_iterator bi;
1126 if (!find_bivs (data))
1127 return false;
1129 find_givs (data);
1130 mark_bivs (data);
1132 if (dump_file && (dump_flags & TDF_DETAILS))
1134 tree niter = niter_for_single_dom_exit (data);
1136 if (niter)
1138 fprintf (dump_file, " number of iterations ");
1139 print_generic_expr (dump_file, niter, TDF_SLIM);
1140 fprintf (dump_file, "\n\n");
1143 fprintf (dump_file, "Induction variables:\n\n");
1145 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1147 if (ver_info (data, i)->iv)
1148 dump_iv (dump_file, ver_info (data, i)->iv);
1152 return true;
1155 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1157 static struct iv_use *
1158 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1159 tree stmt, enum use_type use_type)
1161 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1163 use->id = n_iv_uses (data);
1164 use->type = use_type;
1165 use->iv = iv;
1166 use->stmt = stmt;
1167 use->op_p = use_p;
1168 use->related_cands = BITMAP_ALLOC (NULL);
1170 /* To avoid showing ssa name in the dumps, if it was not reset by the
1171 caller. */
1172 iv->ssa_name = NULL_TREE;
1174 if (dump_file && (dump_flags & TDF_DETAILS))
1175 dump_use (dump_file, use);
1177 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1179 return use;
1182 /* Checks whether OP is a loop-level invariant and if so, records it.
1183 NONLINEAR_USE is true if the invariant is used in a way we do not
1184 handle specially. */
1186 static void
1187 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1189 basic_block bb;
1190 struct version_info *info;
1192 if (TREE_CODE (op) != SSA_NAME
1193 || !is_gimple_reg (op))
1194 return;
1196 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1197 if (bb
1198 && flow_bb_inside_loop_p (data->current_loop, bb))
1199 return;
1201 info = name_info (data, op);
1202 info->name = op;
1203 info->has_nonlin_use |= nonlinear_use;
1204 if (!info->inv_id)
1205 info->inv_id = ++data->max_inv_id;
1206 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1209 /* Checks whether the use OP is interesting and if so, records it
1210 as TYPE. */
1212 static struct iv_use *
1213 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1214 enum use_type type)
1216 struct iv *iv;
1217 struct iv *civ;
1218 tree stmt;
1219 struct iv_use *use;
1221 if (TREE_CODE (op) != SSA_NAME)
1222 return NULL;
1224 iv = get_iv (data, op);
1225 if (!iv)
1226 return NULL;
1228 if (iv->have_use_for)
1230 use = iv_use (data, iv->use_id);
1232 gcc_assert (use->type == USE_NONLINEAR_EXPR
1233 || use->type == USE_OUTER);
1235 if (type == USE_NONLINEAR_EXPR)
1236 use->type = USE_NONLINEAR_EXPR;
1237 return use;
1240 if (zero_p (iv->step))
1242 record_invariant (data, op, true);
1243 return NULL;
1245 iv->have_use_for = true;
1247 civ = xmalloc (sizeof (struct iv));
1248 *civ = *iv;
1250 stmt = SSA_NAME_DEF_STMT (op);
1251 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1252 || TREE_CODE (stmt) == MODIFY_EXPR);
1254 use = record_use (data, NULL, civ, stmt, type);
1255 iv->use_id = use->id;
1257 return use;
1260 /* Checks whether the use OP is interesting and if so, records it. */
1262 static struct iv_use *
1263 find_interesting_uses_op (struct ivopts_data *data, tree op)
1265 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1268 /* Records a definition of induction variable OP that is used outside of the
1269 loop. */
1271 static struct iv_use *
1272 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1274 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1277 /* Checks whether the condition *COND_P in STMT is interesting
1278 and if so, records it. */
1280 static void
1281 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1283 tree *op0_p;
1284 tree *op1_p;
1285 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1286 struct iv const_iv;
1287 tree zero = integer_zero_node;
1289 const_iv.step = NULL_TREE;
1291 if (TREE_CODE (*cond_p) != SSA_NAME
1292 && !COMPARISON_CLASS_P (*cond_p))
1293 return;
1295 if (TREE_CODE (*cond_p) == SSA_NAME)
1297 op0_p = cond_p;
1298 op1_p = &zero;
1300 else
1302 op0_p = &TREE_OPERAND (*cond_p, 0);
1303 op1_p = &TREE_OPERAND (*cond_p, 1);
1306 if (TREE_CODE (*op0_p) == SSA_NAME)
1307 iv0 = get_iv (data, *op0_p);
1308 else
1309 iv0 = &const_iv;
1311 if (TREE_CODE (*op1_p) == SSA_NAME)
1312 iv1 = get_iv (data, *op1_p);
1313 else
1314 iv1 = &const_iv;
1316 if (/* When comparing with non-invariant value, we may not do any senseful
1317 induction variable elimination. */
1318 (!iv0 || !iv1)
1319 /* Eliminating condition based on two ivs would be nontrivial.
1320 ??? TODO -- it is not really important to handle this case. */
1321 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1323 find_interesting_uses_op (data, *op0_p);
1324 find_interesting_uses_op (data, *op1_p);
1325 return;
1328 if (zero_p (iv0->step) && zero_p (iv1->step))
1330 /* If both are invariants, this is a work for unswitching. */
1331 return;
1334 civ = xmalloc (sizeof (struct iv));
1335 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1336 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1339 /* Returns true if expression EXPR is obviously invariant in LOOP,
1340 i.e. if all its operands are defined outside of the LOOP. */
1342 bool
1343 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1345 basic_block def_bb;
1346 unsigned i, len;
1348 if (is_gimple_min_invariant (expr))
1349 return true;
1351 if (TREE_CODE (expr) == SSA_NAME)
1353 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1354 if (def_bb
1355 && flow_bb_inside_loop_p (loop, def_bb))
1356 return false;
1358 return true;
1361 if (!EXPR_P (expr))
1362 return false;
1364 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1365 for (i = 0; i < len; i++)
1366 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1367 return false;
1369 return true;
1372 /* Cumulates the steps of indices into DATA and replaces their values with the
1373 initial ones. Returns false when the value of the index cannot be determined.
1374 Callback for for_each_index. */
1376 struct ifs_ivopts_data
1378 struct ivopts_data *ivopts_data;
1379 tree stmt;
1380 tree *step_p;
1383 static bool
1384 idx_find_step (tree base, tree *idx, void *data)
1386 struct ifs_ivopts_data *dta = data;
1387 struct iv *iv;
1388 tree step, iv_base, iv_step, lbound, off;
1389 struct loop *loop = dta->ivopts_data->current_loop;
1391 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1392 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1393 return false;
1395 /* If base is a component ref, require that the offset of the reference
1396 be invariant. */
1397 if (TREE_CODE (base) == COMPONENT_REF)
1399 off = component_ref_field_offset (base);
1400 return expr_invariant_in_loop_p (loop, off);
1403 /* If base is array, first check whether we will be able to move the
1404 reference out of the loop (in order to take its address in strength
1405 reduction). In order for this to work we need both lower bound
1406 and step to be loop invariants. */
1407 if (TREE_CODE (base) == ARRAY_REF)
1409 step = array_ref_element_size (base);
1410 lbound = array_ref_low_bound (base);
1412 if (!expr_invariant_in_loop_p (loop, step)
1413 || !expr_invariant_in_loop_p (loop, lbound))
1414 return false;
1417 if (TREE_CODE (*idx) != SSA_NAME)
1418 return true;
1420 iv = get_iv (dta->ivopts_data, *idx);
1421 if (!iv)
1422 return false;
1424 *idx = iv->base;
1426 if (!iv->step)
1427 return true;
1429 if (TREE_CODE (base) == ARRAY_REF)
1431 step = array_ref_element_size (base);
1433 /* We only handle addresses whose step is an integer constant. */
1434 if (TREE_CODE (step) != INTEGER_CST)
1435 return false;
1437 else
1438 /* The step for pointer arithmetics already is 1 byte. */
1439 step = build_int_cst (sizetype, 1);
1441 iv_base = iv->base;
1442 iv_step = iv->step;
1443 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1444 sizetype, &iv_base, &iv_step, dta->stmt,
1445 false))
1447 /* The index might wrap. */
1448 return false;
1451 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1453 if (!*dta->step_p)
1454 *dta->step_p = step;
1455 else
1456 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1458 return true;
1461 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1462 object is passed to it in DATA. */
1464 static bool
1465 idx_record_use (tree base, tree *idx,
1466 void *data)
1468 find_interesting_uses_op (data, *idx);
1469 if (TREE_CODE (base) == ARRAY_REF)
1471 find_interesting_uses_op (data, array_ref_element_size (base));
1472 find_interesting_uses_op (data, array_ref_low_bound (base));
1474 return true;
1477 /* Returns true if memory reference REF may be unaligned. */
1479 static bool
1480 may_be_unaligned_p (tree ref)
1482 tree base;
1483 tree base_type;
1484 HOST_WIDE_INT bitsize;
1485 HOST_WIDE_INT bitpos;
1486 tree toffset;
1487 enum machine_mode mode;
1488 int unsignedp, volatilep;
1489 unsigned base_align;
1491 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1492 thus they are not misaligned. */
1493 if (TREE_CODE (ref) == TARGET_MEM_REF)
1494 return false;
1496 /* The test below is basically copy of what expr.c:normal_inner_ref
1497 does to check whether the object must be loaded by parts when
1498 STRICT_ALIGNMENT is true. */
1499 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1500 &unsignedp, &volatilep, true);
1501 base_type = TREE_TYPE (base);
1502 base_align = TYPE_ALIGN (base_type);
1504 if (mode != BLKmode
1505 && (base_align < GET_MODE_ALIGNMENT (mode)
1506 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1507 || bitpos % BITS_PER_UNIT != 0))
1508 return true;
1510 return false;
1513 /* Finds addresses in *OP_P inside STMT. */
1515 static void
1516 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1518 tree base = *op_p, step = NULL;
1519 struct iv *civ;
1520 struct ifs_ivopts_data ifs_ivopts_data;
1522 /* Do not play with volatile memory references. A bit too conservative,
1523 perhaps, but safe. */
1524 if (stmt_ann (stmt)->has_volatile_ops)
1525 goto fail;
1527 /* Ignore bitfields for now. Not really something terribly complicated
1528 to handle. TODO. */
1529 if (TREE_CODE (base) == BIT_FIELD_REF
1530 || (TREE_CODE (base) == COMPONENT_REF
1531 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1532 goto fail;
1534 if (STRICT_ALIGNMENT
1535 && may_be_unaligned_p (base))
1536 goto fail;
1538 base = unshare_expr (base);
1540 if (TREE_CODE (base) == TARGET_MEM_REF)
1542 tree type = build_pointer_type (TREE_TYPE (base));
1543 tree astep;
1545 if (TMR_BASE (base)
1546 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1548 civ = get_iv (data, TMR_BASE (base));
1549 if (!civ)
1550 goto fail;
1552 TMR_BASE (base) = civ->base;
1553 step = civ->step;
1555 if (TMR_INDEX (base)
1556 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1558 civ = get_iv (data, TMR_INDEX (base));
1559 if (!civ)
1560 goto fail;
1562 TMR_INDEX (base) = civ->base;
1563 astep = civ->step;
1565 if (astep)
1567 if (TMR_STEP (base))
1568 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1570 if (step)
1571 step = fold_build2 (PLUS_EXPR, type, step, astep);
1572 else
1573 step = astep;
1577 if (zero_p (step))
1578 goto fail;
1579 base = tree_mem_ref_addr (type, base);
1581 else
1583 ifs_ivopts_data.ivopts_data = data;
1584 ifs_ivopts_data.stmt = stmt;
1585 ifs_ivopts_data.step_p = &step;
1586 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1587 || zero_p (step))
1588 goto fail;
1590 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1591 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1593 base = build_fold_addr_expr (base);
1596 civ = alloc_iv (base, step);
1597 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1598 return;
1600 fail:
1601 for_each_index (op_p, idx_record_use, data);
1604 /* Finds and records invariants used in STMT. */
1606 static void
1607 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1609 ssa_op_iter iter;
1610 use_operand_p use_p;
1611 tree op;
1613 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1615 op = USE_FROM_PTR (use_p);
1616 record_invariant (data, op, false);
1620 /* Finds interesting uses of induction variables in the statement STMT. */
1622 static void
1623 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1625 struct iv *iv;
1626 tree op, lhs, rhs;
1627 ssa_op_iter iter;
1628 use_operand_p use_p;
1630 find_invariants_stmt (data, stmt);
1632 if (TREE_CODE (stmt) == COND_EXPR)
1634 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1635 return;
1638 if (TREE_CODE (stmt) == MODIFY_EXPR)
1640 lhs = TREE_OPERAND (stmt, 0);
1641 rhs = TREE_OPERAND (stmt, 1);
1643 if (TREE_CODE (lhs) == SSA_NAME)
1645 /* If the statement defines an induction variable, the uses are not
1646 interesting by themselves. */
1648 iv = get_iv (data, lhs);
1650 if (iv && !zero_p (iv->step))
1651 return;
1654 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1656 case tcc_comparison:
1657 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1658 return;
1660 case tcc_reference:
1661 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1662 if (REFERENCE_CLASS_P (lhs))
1663 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1664 return;
1666 default: ;
1669 if (REFERENCE_CLASS_P (lhs)
1670 && is_gimple_val (rhs))
1672 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1673 find_interesting_uses_op (data, rhs);
1674 return;
1677 /* TODO -- we should also handle address uses of type
1679 memory = call (whatever);
1683 call (memory). */
1686 if (TREE_CODE (stmt) == PHI_NODE
1687 && bb_for_stmt (stmt) == data->current_loop->header)
1689 lhs = PHI_RESULT (stmt);
1690 iv = get_iv (data, lhs);
1692 if (iv && !zero_p (iv->step))
1693 return;
1696 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1698 op = USE_FROM_PTR (use_p);
1700 if (TREE_CODE (op) != SSA_NAME)
1701 continue;
1703 iv = get_iv (data, op);
1704 if (!iv)
1705 continue;
1707 find_interesting_uses_op (data, op);
1711 /* Finds interesting uses of induction variables outside of loops
1712 on loop exit edge EXIT. */
1714 static void
1715 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1717 tree phi, def;
1719 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1721 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1722 find_interesting_uses_outer (data, def);
1726 /* Finds uses of the induction variables that are interesting. */
1728 static void
1729 find_interesting_uses (struct ivopts_data *data)
1731 basic_block bb;
1732 block_stmt_iterator bsi;
1733 tree phi;
1734 basic_block *body = get_loop_body (data->current_loop);
1735 unsigned i;
1736 struct version_info *info;
1737 edge e;
1739 if (dump_file && (dump_flags & TDF_DETAILS))
1740 fprintf (dump_file, "Uses:\n\n");
1742 for (i = 0; i < data->current_loop->num_nodes; i++)
1744 edge_iterator ei;
1745 bb = body[i];
1747 FOR_EACH_EDGE (e, ei, bb->succs)
1748 if (e->dest != EXIT_BLOCK_PTR
1749 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1750 find_interesting_uses_outside (data, e);
1752 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1753 find_interesting_uses_stmt (data, phi);
1754 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1755 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1760 bitmap_iterator bi;
1762 fprintf (dump_file, "\n");
1764 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766 info = ver_info (data, i);
1767 if (info->inv_id)
1769 fprintf (dump_file, " ");
1770 print_generic_expr (dump_file, info->name, TDF_SLIM);
1771 fprintf (dump_file, " is invariant (%d)%s\n",
1772 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1776 fprintf (dump_file, "\n");
1779 free (body);
1782 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1783 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1784 we are at the top-level of the processed address. */
1786 static tree
1787 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1788 unsigned HOST_WIDE_INT *offset)
1790 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1791 enum tree_code code;
1792 tree type, orig_type = TREE_TYPE (expr);
1793 unsigned HOST_WIDE_INT off0, off1, st;
1794 tree orig_expr = expr;
1796 STRIP_NOPS (expr);
1798 type = TREE_TYPE (expr);
1799 code = TREE_CODE (expr);
1800 *offset = 0;
1802 switch (code)
1804 case INTEGER_CST:
1805 if (!cst_and_fits_in_hwi (expr)
1806 || zero_p (expr))
1807 return orig_expr;
1809 *offset = int_cst_value (expr);
1810 return build_int_cst_type (orig_type, 0);
1812 case PLUS_EXPR:
1813 case MINUS_EXPR:
1814 op0 = TREE_OPERAND (expr, 0);
1815 op1 = TREE_OPERAND (expr, 1);
1817 op0 = strip_offset_1 (op0, false, false, &off0);
1818 op1 = strip_offset_1 (op1, false, false, &off1);
1820 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1821 if (op0 == TREE_OPERAND (expr, 0)
1822 && op1 == TREE_OPERAND (expr, 1))
1823 return orig_expr;
1825 if (zero_p (op1))
1826 expr = op0;
1827 else if (zero_p (op0))
1829 if (code == PLUS_EXPR)
1830 expr = op1;
1831 else
1832 expr = fold_build1 (NEGATE_EXPR, type, op1);
1834 else
1835 expr = fold_build2 (code, type, op0, op1);
1837 return fold_convert (orig_type, expr);
1839 case ARRAY_REF:
1840 if (!inside_addr)
1841 return orig_expr;
1843 step = array_ref_element_size (expr);
1844 if (!cst_and_fits_in_hwi (step))
1845 break;
1847 st = int_cst_value (step);
1848 op1 = TREE_OPERAND (expr, 1);
1849 op1 = strip_offset_1 (op1, false, false, &off1);
1850 *offset = off1 * st;
1852 if (top_compref
1853 && zero_p (op1))
1855 /* Strip the component reference completely. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1858 *offset += off0;
1859 return op0;
1861 break;
1863 case COMPONENT_REF:
1864 if (!inside_addr)
1865 return orig_expr;
1867 tmp = component_ref_field_offset (expr);
1868 if (top_compref
1869 && cst_and_fits_in_hwi (tmp))
1871 /* Strip the component reference completely. */
1872 op0 = TREE_OPERAND (expr, 0);
1873 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1874 *offset = off0 + int_cst_value (tmp);
1875 return op0;
1877 break;
1879 case ADDR_EXPR:
1880 op0 = TREE_OPERAND (expr, 0);
1881 op0 = strip_offset_1 (op0, true, true, &off0);
1882 *offset += off0;
1884 if (op0 == TREE_OPERAND (expr, 0))
1885 return orig_expr;
1887 expr = build_fold_addr_expr (op0);
1888 return fold_convert (orig_type, expr);
1890 case INDIRECT_REF:
1891 inside_addr = false;
1892 break;
1894 default:
1895 return orig_expr;
1898 /* Default handling of expressions for that we want to recurse into
1899 the first operand. */
1900 op0 = TREE_OPERAND (expr, 0);
1901 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1902 *offset += off0;
1904 if (op0 == TREE_OPERAND (expr, 0)
1905 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1906 return orig_expr;
1908 expr = copy_node (expr);
1909 TREE_OPERAND (expr, 0) = op0;
1910 if (op1)
1911 TREE_OPERAND (expr, 1) = op1;
1913 /* Inside address, we might strip the top level component references,
1914 thus changing type of the expression. Handling of ADDR_EXPR
1915 will fix that. */
1916 expr = fold_convert (orig_type, expr);
1918 return expr;
1921 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1923 static tree
1924 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1926 return strip_offset_1 (expr, false, false, offset);
1929 /* Returns variant of TYPE that can be used as base for different uses.
1930 For integer types, we return unsigned variant of the type, which
1931 avoids problems with overflows. For pointer types, we return void *. */
1933 static tree
1934 generic_type_for (tree type)
1936 if (POINTER_TYPE_P (type))
1937 return ptr_type_node;
1939 if (TYPE_UNSIGNED (type))
1940 return type;
1942 return unsigned_type_for (type);
1945 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1946 the bitmap to that we should store it. */
1948 static struct ivopts_data *fd_ivopts_data;
1949 static tree
1950 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1952 bitmap *depends_on = data;
1953 struct version_info *info;
1955 if (TREE_CODE (*expr_p) != SSA_NAME)
1956 return NULL_TREE;
1957 info = name_info (fd_ivopts_data, *expr_p);
1959 if (!info->inv_id || info->has_nonlin_use)
1960 return NULL_TREE;
1962 if (!*depends_on)
1963 *depends_on = BITMAP_ALLOC (NULL);
1964 bitmap_set_bit (*depends_on, info->inv_id);
1966 return NULL_TREE;
1969 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1970 position to POS. If USE is not NULL, the candidate is set as related to
1971 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1972 replacement of the final value of the iv by a direct computation. */
1974 static struct iv_cand *
1975 add_candidate_1 (struct ivopts_data *data,
1976 tree base, tree step, bool important, enum iv_position pos,
1977 struct iv_use *use, tree incremented_at)
1979 unsigned i;
1980 struct iv_cand *cand = NULL;
1981 tree type, orig_type;
1983 if (base)
1985 orig_type = TREE_TYPE (base);
1986 type = generic_type_for (orig_type);
1987 if (type != orig_type)
1989 base = fold_convert (type, base);
1990 if (step)
1991 step = fold_convert (type, step);
1995 for (i = 0; i < n_iv_cands (data); i++)
1997 cand = iv_cand (data, i);
1999 if (cand->pos != pos)
2000 continue;
2002 if (cand->incremented_at != incremented_at)
2003 continue;
2005 if (!cand->iv)
2007 if (!base && !step)
2008 break;
2010 continue;
2013 if (!base && !step)
2014 continue;
2016 if (!operand_equal_p (base, cand->iv->base, 0))
2017 continue;
2019 if (zero_p (cand->iv->step))
2021 if (zero_p (step))
2022 break;
2024 else
2026 if (step && operand_equal_p (step, cand->iv->step, 0))
2027 break;
2031 if (i == n_iv_cands (data))
2033 cand = xcalloc (1, sizeof (struct iv_cand));
2034 cand->id = i;
2036 if (!base && !step)
2037 cand->iv = NULL;
2038 else
2039 cand->iv = alloc_iv (base, step);
2041 cand->pos = pos;
2042 if (pos != IP_ORIGINAL && cand->iv)
2044 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2045 cand->var_after = cand->var_before;
2047 cand->important = important;
2048 cand->incremented_at = incremented_at;
2049 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2051 if (step
2052 && TREE_CODE (step) != INTEGER_CST)
2054 fd_ivopts_data = data;
2055 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2058 if (dump_file && (dump_flags & TDF_DETAILS))
2059 dump_cand (dump_file, cand);
2062 if (important && !cand->important)
2064 cand->important = true;
2065 if (dump_file && (dump_flags & TDF_DETAILS))
2066 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2069 if (use)
2071 bitmap_set_bit (use->related_cands, i);
2072 if (dump_file && (dump_flags & TDF_DETAILS))
2073 fprintf (dump_file, "Candidate %d is related to use %d\n",
2074 cand->id, use->id);
2077 return cand;
2080 /* Returns true if incrementing the induction variable at the end of the LOOP
2081 is allowed.
2083 The purpose is to avoid splitting latch edge with a biv increment, thus
2084 creating a jump, possibly confusing other optimization passes and leaving
2085 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2086 is not available (so we do not have a better alternative), or if the latch
2087 edge is already nonempty. */
2089 static bool
2090 allow_ip_end_pos_p (struct loop *loop)
2092 if (!ip_normal_pos (loop))
2093 return true;
2095 if (!empty_block_p (ip_end_pos (loop)))
2096 return true;
2098 return false;
2101 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2102 position to POS. If USE is not NULL, the candidate is set as related to
2103 it. The candidate computation is scheduled on all available positions. */
2105 static void
2106 add_candidate (struct ivopts_data *data,
2107 tree base, tree step, bool important, struct iv_use *use)
2109 if (ip_normal_pos (data->current_loop))
2110 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2111 if (ip_end_pos (data->current_loop)
2112 && allow_ip_end_pos_p (data->current_loop))
2113 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2116 /* Add a standard "0 + 1 * iteration" iv candidate for a
2117 type with SIZE bits. */
2119 static void
2120 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2121 unsigned int size)
2123 tree type = lang_hooks.types.type_for_size (size, true);
2124 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2125 true, NULL);
2128 /* Adds standard iv candidates. */
2130 static void
2131 add_standard_iv_candidates (struct ivopts_data *data)
2133 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2135 /* The same for a double-integer type if it is still fast enough. */
2136 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2137 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2141 /* Adds candidates bases on the old induction variable IV. */
2143 static void
2144 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2146 tree phi, def;
2147 struct iv_cand *cand;
2149 add_candidate (data, iv->base, iv->step, true, NULL);
2151 /* The same, but with initial value zero. */
2152 add_candidate (data,
2153 build_int_cst (TREE_TYPE (iv->base), 0),
2154 iv->step, true, NULL);
2156 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2157 if (TREE_CODE (phi) == PHI_NODE)
2159 /* Additionally record the possibility of leaving the original iv
2160 untouched. */
2161 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2162 cand = add_candidate_1 (data,
2163 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2164 SSA_NAME_DEF_STMT (def));
2165 cand->var_before = iv->ssa_name;
2166 cand->var_after = def;
2170 /* Adds candidates based on the old induction variables. */
2172 static void
2173 add_old_ivs_candidates (struct ivopts_data *data)
2175 unsigned i;
2176 struct iv *iv;
2177 bitmap_iterator bi;
2179 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2181 iv = ver_info (data, i)->iv;
2182 if (iv && iv->biv_p && !zero_p (iv->step))
2183 add_old_iv_candidates (data, iv);
2187 /* Adds candidates based on the value of the induction variable IV and USE. */
2189 static void
2190 add_iv_value_candidates (struct ivopts_data *data,
2191 struct iv *iv, struct iv_use *use)
2193 unsigned HOST_WIDE_INT offset;
2194 tree base;
2196 add_candidate (data, iv->base, iv->step, false, use);
2198 /* The same, but with initial value zero. Make such variable important,
2199 since it is generic enough so that possibly many uses may be based
2200 on it. */
2201 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2202 iv->step, true, use);
2204 /* Third, try removing the constant offset. */
2205 base = strip_offset (iv->base, &offset);
2206 if (offset)
2207 add_candidate (data, base, iv->step, false, use);
2210 /* Possibly adds pseudocandidate for replacing the final value of USE by
2211 a direct computation. */
2213 static void
2214 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2216 /* We must know where we exit the loop and how many times does it roll. */
2217 if (! niter_for_single_dom_exit (data))
2218 return;
2220 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2223 /* Adds candidates based on the uses. */
2225 static void
2226 add_derived_ivs_candidates (struct ivopts_data *data)
2228 unsigned i;
2230 for (i = 0; i < n_iv_uses (data); i++)
2232 struct iv_use *use = iv_use (data, i);
2234 if (!use)
2235 continue;
2237 switch (use->type)
2239 case USE_NONLINEAR_EXPR:
2240 case USE_COMPARE:
2241 case USE_ADDRESS:
2242 /* Just add the ivs based on the value of the iv used here. */
2243 add_iv_value_candidates (data, use->iv, use);
2244 break;
2246 case USE_OUTER:
2247 add_iv_value_candidates (data, use->iv, use);
2249 /* Additionally, add the pseudocandidate for the possibility to
2250 replace the final value by a direct computation. */
2251 add_iv_outer_candidates (data, use);
2252 break;
2254 default:
2255 gcc_unreachable ();
2260 /* Record important candidates and add them to related_cands bitmaps
2261 if needed. */
2263 static void
2264 record_important_candidates (struct ivopts_data *data)
2266 unsigned i;
2267 struct iv_use *use;
2269 for (i = 0; i < n_iv_cands (data); i++)
2271 struct iv_cand *cand = iv_cand (data, i);
2273 if (cand->important)
2274 bitmap_set_bit (data->important_candidates, i);
2277 data->consider_all_candidates = (n_iv_cands (data)
2278 <= CONSIDER_ALL_CANDIDATES_BOUND);
2280 if (data->consider_all_candidates)
2282 /* We will not need "related_cands" bitmaps in this case,
2283 so release them to decrease peak memory consumption. */
2284 for (i = 0; i < n_iv_uses (data); i++)
2286 use = iv_use (data, i);
2287 BITMAP_FREE (use->related_cands);
2290 else
2292 /* Add important candidates to the related_cands bitmaps. */
2293 for (i = 0; i < n_iv_uses (data); i++)
2294 bitmap_ior_into (iv_use (data, i)->related_cands,
2295 data->important_candidates);
2299 /* Finds the candidates for the induction variables. */
2301 static void
2302 find_iv_candidates (struct ivopts_data *data)
2304 /* Add commonly used ivs. */
2305 add_standard_iv_candidates (data);
2307 /* Add old induction variables. */
2308 add_old_ivs_candidates (data);
2310 /* Add induction variables derived from uses. */
2311 add_derived_ivs_candidates (data);
2313 /* Record the important candidates. */
2314 record_important_candidates (data);
2317 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2318 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2319 we allocate a simple list to every use. */
2321 static void
2322 alloc_use_cost_map (struct ivopts_data *data)
2324 unsigned i, size, s, j;
2326 for (i = 0; i < n_iv_uses (data); i++)
2328 struct iv_use *use = iv_use (data, i);
2329 bitmap_iterator bi;
2331 if (data->consider_all_candidates)
2332 size = n_iv_cands (data);
2333 else
2335 s = 0;
2336 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2338 s++;
2341 /* Round up to the power of two, so that moduling by it is fast. */
2342 for (size = 1; size < s; size <<= 1)
2343 continue;
2346 use->n_map_members = size;
2347 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2351 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2352 on invariants DEPENDS_ON and that the value used in expressing it
2353 is VALUE.*/
2355 static void
2356 set_use_iv_cost (struct ivopts_data *data,
2357 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2358 bitmap depends_on, tree value)
2360 unsigned i, s;
2362 if (cost == INFTY)
2364 BITMAP_FREE (depends_on);
2365 return;
2368 if (data->consider_all_candidates)
2370 use->cost_map[cand->id].cand = cand;
2371 use->cost_map[cand->id].cost = cost;
2372 use->cost_map[cand->id].depends_on = depends_on;
2373 use->cost_map[cand->id].value = value;
2374 return;
2377 /* n_map_members is a power of two, so this computes modulo. */
2378 s = cand->id & (use->n_map_members - 1);
2379 for (i = s; i < use->n_map_members; i++)
2380 if (!use->cost_map[i].cand)
2381 goto found;
2382 for (i = 0; i < s; i++)
2383 if (!use->cost_map[i].cand)
2384 goto found;
2386 gcc_unreachable ();
2388 found:
2389 use->cost_map[i].cand = cand;
2390 use->cost_map[i].cost = cost;
2391 use->cost_map[i].depends_on = depends_on;
2392 use->cost_map[i].value = value;
2395 /* Gets cost of (USE, CANDIDATE) pair. */
2397 static struct cost_pair *
2398 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2399 struct iv_cand *cand)
2401 unsigned i, s;
2402 struct cost_pair *ret;
2404 if (!cand)
2405 return NULL;
2407 if (data->consider_all_candidates)
2409 ret = use->cost_map + cand->id;
2410 if (!ret->cand)
2411 return NULL;
2413 return ret;
2416 /* n_map_members is a power of two, so this computes modulo. */
2417 s = cand->id & (use->n_map_members - 1);
2418 for (i = s; i < use->n_map_members; i++)
2419 if (use->cost_map[i].cand == cand)
2420 return use->cost_map + i;
2422 for (i = 0; i < s; i++)
2423 if (use->cost_map[i].cand == cand)
2424 return use->cost_map + i;
2426 return NULL;
2429 /* Returns estimate on cost of computing SEQ. */
2431 static unsigned
2432 seq_cost (rtx seq)
2434 unsigned cost = 0;
2435 rtx set;
2437 for (; seq; seq = NEXT_INSN (seq))
2439 set = single_set (seq);
2440 if (set)
2441 cost += rtx_cost (set, SET);
2442 else
2443 cost++;
2446 return cost;
2449 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2450 static rtx
2451 produce_memory_decl_rtl (tree obj, int *regno)
2453 rtx x;
2455 gcc_assert (obj);
2456 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2458 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2459 x = gen_rtx_SYMBOL_REF (Pmode, name);
2461 else
2462 x = gen_raw_REG (Pmode, (*regno)++);
2464 return gen_rtx_MEM (DECL_MODE (obj), x);
2467 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2468 walk_tree. DATA contains the actual fake register number. */
2470 static tree
2471 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2473 tree obj = NULL_TREE;
2474 rtx x = NULL_RTX;
2475 int *regno = data;
2477 switch (TREE_CODE (*expr_p))
2479 case ADDR_EXPR:
2480 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2481 handled_component_p (*expr_p);
2482 expr_p = &TREE_OPERAND (*expr_p, 0))
2483 continue;
2484 obj = *expr_p;
2485 if (DECL_P (obj))
2486 x = produce_memory_decl_rtl (obj, regno);
2487 break;
2489 case SSA_NAME:
2490 *ws = 0;
2491 obj = SSA_NAME_VAR (*expr_p);
2492 if (!DECL_RTL_SET_P (obj))
2493 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2494 break;
2496 case VAR_DECL:
2497 case PARM_DECL:
2498 case RESULT_DECL:
2499 *ws = 0;
2500 obj = *expr_p;
2502 if (DECL_RTL_SET_P (obj))
2503 break;
2505 if (DECL_MODE (obj) == BLKmode)
2506 x = produce_memory_decl_rtl (obj, regno);
2507 else
2508 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2510 break;
2512 default:
2513 break;
2516 if (x)
2518 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2519 SET_DECL_RTL (obj, x);
2522 return NULL_TREE;
2525 /* Determines cost of the computation of EXPR. */
2527 static unsigned
2528 computation_cost (tree expr)
2530 rtx seq, rslt;
2531 tree type = TREE_TYPE (expr);
2532 unsigned cost;
2533 /* Avoid using hard regs in ways which may be unsupported. */
2534 int regno = LAST_VIRTUAL_REGISTER + 1;
2536 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2537 start_sequence ();
2538 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2539 seq = get_insns ();
2540 end_sequence ();
2542 cost = seq_cost (seq);
2543 if (MEM_P (rslt))
2544 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2546 return cost;
2549 /* Returns variable containing the value of candidate CAND at statement AT. */
2551 static tree
2552 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2554 if (stmt_after_increment (loop, cand, stmt))
2555 return cand->var_after;
2556 else
2557 return cand->var_before;
2560 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2561 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2564 tree_int_cst_sign_bit (tree t)
2566 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2567 unsigned HOST_WIDE_INT w;
2569 if (bitno < HOST_BITS_PER_WIDE_INT)
2570 w = TREE_INT_CST_LOW (t);
2571 else
2573 w = TREE_INT_CST_HIGH (t);
2574 bitno -= HOST_BITS_PER_WIDE_INT;
2577 return (w >> bitno) & 1;
2580 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2581 return cst. Otherwise return NULL_TREE. */
2583 static tree
2584 constant_multiple_of (tree type, tree top, tree bot)
2586 tree res, mby, p0, p1;
2587 enum tree_code code;
2588 bool negate;
2590 STRIP_NOPS (top);
2591 STRIP_NOPS (bot);
2593 if (operand_equal_p (top, bot, 0))
2594 return build_int_cst (type, 1);
2596 code = TREE_CODE (top);
2597 switch (code)
2599 case MULT_EXPR:
2600 mby = TREE_OPERAND (top, 1);
2601 if (TREE_CODE (mby) != INTEGER_CST)
2602 return NULL_TREE;
2604 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2605 if (!res)
2606 return NULL_TREE;
2608 return fold_binary_to_constant (MULT_EXPR, type, res,
2609 fold_convert (type, mby));
2611 case PLUS_EXPR:
2612 case MINUS_EXPR:
2613 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2614 if (!p0)
2615 return NULL_TREE;
2616 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2617 if (!p1)
2618 return NULL_TREE;
2620 return fold_binary_to_constant (code, type, p0, p1);
2622 case INTEGER_CST:
2623 if (TREE_CODE (bot) != INTEGER_CST)
2624 return NULL_TREE;
2626 bot = fold_convert (type, bot);
2627 top = fold_convert (type, top);
2629 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2630 the result afterwards. */
2631 if (tree_int_cst_sign_bit (bot))
2633 negate = true;
2634 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2636 else
2637 negate = false;
2639 /* Ditto for TOP. */
2640 if (tree_int_cst_sign_bit (top))
2642 negate = !negate;
2643 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2646 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2647 return NULL_TREE;
2649 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2650 if (negate)
2651 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2652 return res;
2654 default:
2655 return NULL_TREE;
2659 /* Sets COMB to CST. */
2661 static void
2662 aff_combination_const (struct affine_tree_combination *comb, tree type,
2663 unsigned HOST_WIDE_INT cst)
2665 unsigned prec = TYPE_PRECISION (type);
2667 comb->type = type;
2668 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2670 comb->n = 0;
2671 comb->rest = NULL_TREE;
2672 comb->offset = cst & comb->mask;
2675 /* Sets COMB to single element ELT. */
2677 static void
2678 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2680 unsigned prec = TYPE_PRECISION (type);
2682 comb->type = type;
2683 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2685 comb->n = 1;
2686 comb->elts[0] = elt;
2687 comb->coefs[0] = 1;
2688 comb->rest = NULL_TREE;
2689 comb->offset = 0;
2692 /* Scales COMB by SCALE. */
2694 static void
2695 aff_combination_scale (struct affine_tree_combination *comb,
2696 unsigned HOST_WIDE_INT scale)
2698 unsigned i, j;
2700 if (scale == 1)
2701 return;
2703 if (scale == 0)
2705 aff_combination_const (comb, comb->type, 0);
2706 return;
2709 comb->offset = (scale * comb->offset) & comb->mask;
2710 for (i = 0, j = 0; i < comb->n; i++)
2712 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2713 comb->elts[j] = comb->elts[i];
2714 if (comb->coefs[j] != 0)
2715 j++;
2717 comb->n = j;
2719 if (comb->rest)
2721 if (comb->n < MAX_AFF_ELTS)
2723 comb->coefs[comb->n] = scale;
2724 comb->elts[comb->n] = comb->rest;
2725 comb->rest = NULL_TREE;
2726 comb->n++;
2728 else
2729 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2730 build_int_cst_type (comb->type, scale));
2734 /* Adds ELT * SCALE to COMB. */
2736 static void
2737 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2738 unsigned HOST_WIDE_INT scale)
2740 unsigned i;
2742 if (scale == 0)
2743 return;
2745 for (i = 0; i < comb->n; i++)
2746 if (operand_equal_p (comb->elts[i], elt, 0))
2748 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2749 if (comb->coefs[i])
2750 return;
2752 comb->n--;
2753 comb->coefs[i] = comb->coefs[comb->n];
2754 comb->elts[i] = comb->elts[comb->n];
2756 if (comb->rest)
2758 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2759 comb->coefs[comb->n] = 1;
2760 comb->elts[comb->n] = comb->rest;
2761 comb->rest = NULL_TREE;
2762 comb->n++;
2764 return;
2766 if (comb->n < MAX_AFF_ELTS)
2768 comb->coefs[comb->n] = scale;
2769 comb->elts[comb->n] = elt;
2770 comb->n++;
2771 return;
2774 if (scale == 1)
2775 elt = fold_convert (comb->type, elt);
2776 else
2777 elt = fold_build2 (MULT_EXPR, comb->type,
2778 fold_convert (comb->type, elt),
2779 build_int_cst_type (comb->type, scale));
2781 if (comb->rest)
2782 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2783 else
2784 comb->rest = elt;
2787 /* Adds COMB2 to COMB1. */
2789 static void
2790 aff_combination_add (struct affine_tree_combination *comb1,
2791 struct affine_tree_combination *comb2)
2793 unsigned i;
2795 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2796 for (i = 0; i < comb2->n; i++)
2797 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2798 if (comb2->rest)
2799 aff_combination_add_elt (comb1, comb2->rest, 1);
2802 /* Splits EXPR into an affine combination of parts. */
2804 static void
2805 tree_to_aff_combination (tree expr, tree type,
2806 struct affine_tree_combination *comb)
2808 struct affine_tree_combination tmp;
2809 enum tree_code code;
2810 tree cst, core, toffset;
2811 HOST_WIDE_INT bitpos, bitsize;
2812 enum machine_mode mode;
2813 int unsignedp, volatilep;
2815 STRIP_NOPS (expr);
2817 code = TREE_CODE (expr);
2818 switch (code)
2820 case INTEGER_CST:
2821 aff_combination_const (comb, type, int_cst_value (expr));
2822 return;
2824 case PLUS_EXPR:
2825 case MINUS_EXPR:
2826 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2827 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2828 if (code == MINUS_EXPR)
2829 aff_combination_scale (&tmp, -1);
2830 aff_combination_add (comb, &tmp);
2831 return;
2833 case MULT_EXPR:
2834 cst = TREE_OPERAND (expr, 1);
2835 if (TREE_CODE (cst) != INTEGER_CST)
2836 break;
2837 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2838 aff_combination_scale (comb, int_cst_value (cst));
2839 return;
2841 case NEGATE_EXPR:
2842 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2843 aff_combination_scale (comb, -1);
2844 return;
2846 case ADDR_EXPR:
2847 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2848 &toffset, &mode, &unsignedp, &volatilep,
2849 false);
2850 if (bitpos % BITS_PER_UNIT != 0)
2851 break;
2852 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2853 core = build_fold_addr_expr (core);
2854 if (TREE_CODE (core) == ADDR_EXPR)
2855 aff_combination_add_elt (comb, core, 1);
2856 else
2858 tree_to_aff_combination (core, type, &tmp);
2859 aff_combination_add (comb, &tmp);
2861 if (toffset)
2863 tree_to_aff_combination (toffset, type, &tmp);
2864 aff_combination_add (comb, &tmp);
2866 return;
2868 default:
2869 break;
2872 aff_combination_elt (comb, type, expr);
2875 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2877 static tree
2878 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2879 unsigned HOST_WIDE_INT mask)
2881 enum tree_code code;
2883 scale &= mask;
2884 elt = fold_convert (type, elt);
2886 if (scale == 1)
2888 if (!expr)
2889 return elt;
2891 return fold_build2 (PLUS_EXPR, type, expr, elt);
2894 if (scale == mask)
2896 if (!expr)
2897 return fold_build1 (NEGATE_EXPR, type, elt);
2899 return fold_build2 (MINUS_EXPR, type, expr, elt);
2902 if (!expr)
2903 return fold_build2 (MULT_EXPR, type, elt,
2904 build_int_cst_type (type, scale));
2906 if ((scale | (mask >> 1)) == mask)
2908 /* Scale is negative. */
2909 code = MINUS_EXPR;
2910 scale = (-scale) & mask;
2912 else
2913 code = PLUS_EXPR;
2915 elt = fold_build2 (MULT_EXPR, type, elt,
2916 build_int_cst_type (type, scale));
2917 return fold_build2 (code, type, expr, elt);
2920 /* Copies the tree elements of COMB to ensure that they are not shared. */
2922 static void
2923 unshare_aff_combination (struct affine_tree_combination *comb)
2925 unsigned i;
2927 for (i = 0; i < comb->n; i++)
2928 comb->elts[i] = unshare_expr (comb->elts[i]);
2929 if (comb->rest)
2930 comb->rest = unshare_expr (comb->rest);
2933 /* Makes tree from the affine combination COMB. */
2935 static tree
2936 aff_combination_to_tree (struct affine_tree_combination *comb)
2938 tree type = comb->type;
2939 tree expr = comb->rest;
2940 unsigned i;
2941 unsigned HOST_WIDE_INT off, sgn;
2943 /* Handle the special case produced by get_computation_aff when
2944 the type does not fit in HOST_WIDE_INT. */
2945 if (comb->n == 0 && comb->offset == 0)
2946 return fold_convert (type, expr);
2948 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2950 for (i = 0; i < comb->n; i++)
2951 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2952 comb->mask);
2954 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2956 /* Offset is negative. */
2957 off = (-comb->offset) & comb->mask;
2958 sgn = comb->mask;
2960 else
2962 off = comb->offset;
2963 sgn = 1;
2965 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2966 comb->mask);
2969 /* Determines the expression by that USE is expressed from induction variable
2970 CAND at statement AT in LOOP. The expression is stored in a decomposed
2971 form into AFF. Returns false if USE cannot be expressed using CAND. */
2973 static bool
2974 get_computation_aff (struct loop *loop,
2975 struct iv_use *use, struct iv_cand *cand, tree at,
2976 struct affine_tree_combination *aff)
2978 tree ubase = use->iv->base;
2979 tree ustep = use->iv->step;
2980 tree cbase = cand->iv->base;
2981 tree cstep = cand->iv->step;
2982 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2983 tree uutype;
2984 tree expr, delta;
2985 tree ratio;
2986 unsigned HOST_WIDE_INT ustepi, cstepi;
2987 HOST_WIDE_INT ratioi;
2988 struct affine_tree_combination cbase_aff, expr_aff;
2989 tree cstep_orig = cstep, ustep_orig = ustep;
2991 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2993 /* We do not have a precision to express the values of use. */
2994 return false;
2997 expr = var_at_stmt (loop, cand, at);
2999 if (TREE_TYPE (expr) != ctype)
3001 /* This may happen with the original ivs. */
3002 expr = fold_convert (ctype, expr);
3005 if (TYPE_UNSIGNED (utype))
3006 uutype = utype;
3007 else
3009 uutype = unsigned_type_for (utype);
3010 ubase = fold_convert (uutype, ubase);
3011 ustep = fold_convert (uutype, ustep);
3014 if (uutype != ctype)
3016 expr = fold_convert (uutype, expr);
3017 cbase = fold_convert (uutype, cbase);
3018 cstep = fold_convert (uutype, cstep);
3020 /* If the conversion is not noop, we must take it into account when
3021 considering the value of the step. */
3022 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3023 cstep_orig = cstep;
3026 if (cst_and_fits_in_hwi (cstep_orig)
3027 && cst_and_fits_in_hwi (ustep_orig))
3029 ustepi = int_cst_value (ustep_orig);
3030 cstepi = int_cst_value (cstep_orig);
3032 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3034 /* TODO maybe consider case when ustep divides cstep and the ratio is
3035 a power of 2 (so that the division is fast to execute)? We would
3036 need to be much more careful with overflows etc. then. */
3037 return false;
3040 ratio = build_int_cst_type (uutype, ratioi);
3042 else
3044 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3045 if (!ratio)
3046 return false;
3048 /* Ratioi is only used to detect special cases when the multiplicative
3049 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3050 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3051 to integer_onep/integer_all_onesp, since the former ignores
3052 TREE_OVERFLOW. */
3053 if (cst_and_fits_in_hwi (ratio))
3054 ratioi = int_cst_value (ratio);
3055 else if (integer_onep (ratio))
3056 ratioi = 1;
3057 else if (integer_all_onesp (ratio))
3058 ratioi = -1;
3059 else
3060 ratioi = 0;
3063 /* We may need to shift the value if we are after the increment. */
3064 if (stmt_after_increment (loop, cand, at))
3065 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3067 /* use = ubase - ratio * cbase + ratio * var.
3069 In general case ubase + ratio * (var - cbase) could be better (one less
3070 multiplication), but often it is possible to eliminate redundant parts
3071 of computations from (ubase - ratio * cbase) term, and if it does not
3072 happen, fold is able to apply the distributive law to obtain this form
3073 anyway. */
3075 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3077 /* Let's compute in trees and just return the result in AFF. This case
3078 should not be very common, and fold itself is not that bad either,
3079 so making the aff. functions more complicated to handle this case
3080 is not that urgent. */
3081 if (ratioi == 1)
3083 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3084 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3086 else if (ratioi == -1)
3088 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3089 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3091 else
3093 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3094 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3095 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3096 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3099 aff->type = uutype;
3100 aff->n = 0;
3101 aff->offset = 0;
3102 aff->mask = 0;
3103 aff->rest = expr;
3104 return true;
3107 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3108 possible to compute ratioi. */
3109 gcc_assert (ratioi);
3111 tree_to_aff_combination (ubase, uutype, aff);
3112 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3113 tree_to_aff_combination (expr, uutype, &expr_aff);
3114 aff_combination_scale (&cbase_aff, -ratioi);
3115 aff_combination_scale (&expr_aff, ratioi);
3116 aff_combination_add (aff, &cbase_aff);
3117 aff_combination_add (aff, &expr_aff);
3119 return true;
3122 /* Determines the expression by that USE is expressed from induction variable
3123 CAND at statement AT in LOOP. The computation is unshared. */
3125 static tree
3126 get_computation_at (struct loop *loop,
3127 struct iv_use *use, struct iv_cand *cand, tree at)
3129 struct affine_tree_combination aff;
3130 tree type = TREE_TYPE (use->iv->base);
3132 if (!get_computation_aff (loop, use, cand, at, &aff))
3133 return NULL_TREE;
3134 unshare_aff_combination (&aff);
3135 return fold_convert (type, aff_combination_to_tree (&aff));
3138 /* Determines the expression by that USE is expressed from induction variable
3139 CAND in LOOP. The computation is unshared. */
3141 static tree
3142 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3144 return get_computation_at (loop, use, cand, use->stmt);
3147 /* Returns cost of addition in MODE. */
3149 static unsigned
3150 add_cost (enum machine_mode mode)
3152 static unsigned costs[NUM_MACHINE_MODES];
3153 rtx seq;
3154 unsigned cost;
3156 if (costs[mode])
3157 return costs[mode];
3159 start_sequence ();
3160 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3161 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3162 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3163 NULL_RTX);
3164 seq = get_insns ();
3165 end_sequence ();
3167 cost = seq_cost (seq);
3168 if (!cost)
3169 cost = 1;
3171 costs[mode] = cost;
3173 if (dump_file && (dump_flags & TDF_DETAILS))
3174 fprintf (dump_file, "Addition in %s costs %d\n",
3175 GET_MODE_NAME (mode), cost);
3176 return cost;
3179 /* Entry in a hashtable of already known costs for multiplication. */
3180 struct mbc_entry
3182 HOST_WIDE_INT cst; /* The constant to multiply by. */
3183 enum machine_mode mode; /* In mode. */
3184 unsigned cost; /* The cost. */
3187 /* Counts hash value for the ENTRY. */
3189 static hashval_t
3190 mbc_entry_hash (const void *entry)
3192 const struct mbc_entry *e = entry;
3194 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3197 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3199 static int
3200 mbc_entry_eq (const void *entry1, const void *entry2)
3202 const struct mbc_entry *e1 = entry1;
3203 const struct mbc_entry *e2 = entry2;
3205 return (e1->mode == e2->mode
3206 && e1->cst == e2->cst);
3209 /* Returns cost of multiplication by constant CST in MODE. */
3211 unsigned
3212 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3214 static htab_t costs;
3215 struct mbc_entry **cached, act;
3216 rtx seq;
3217 unsigned cost;
3219 if (!costs)
3220 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3222 act.mode = mode;
3223 act.cst = cst;
3224 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3225 if (*cached)
3226 return (*cached)->cost;
3228 *cached = xmalloc (sizeof (struct mbc_entry));
3229 (*cached)->mode = mode;
3230 (*cached)->cst = cst;
3232 start_sequence ();
3233 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3234 gen_int_mode (cst, mode), NULL_RTX, 0);
3235 seq = get_insns ();
3236 end_sequence ();
3238 cost = seq_cost (seq);
3240 if (dump_file && (dump_flags & TDF_DETAILS))
3241 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3242 (int) cst, GET_MODE_NAME (mode), cost);
3244 (*cached)->cost = cost;
3246 return cost;
3249 /* Returns true if multiplying by RATIO is allowed in address. */
3251 bool
3252 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3254 #define MAX_RATIO 128
3255 static sbitmap valid_mult;
3257 if (!valid_mult)
3259 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3260 rtx addr;
3261 HOST_WIDE_INT i;
3263 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3264 sbitmap_zero (valid_mult);
3265 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3266 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3268 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3269 if (memory_address_p (Pmode, addr))
3270 SET_BIT (valid_mult, i + MAX_RATIO);
3273 if (dump_file && (dump_flags & TDF_DETAILS))
3275 fprintf (dump_file, " allowed multipliers:");
3276 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3277 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3278 fprintf (dump_file, " %d", (int) i);
3279 fprintf (dump_file, "\n");
3280 fprintf (dump_file, "\n");
3284 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3285 return false;
3287 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3290 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3291 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3292 variable is omitted. The created memory accesses MODE.
3294 TODO -- there must be some better way. This all is quite crude. */
3296 static unsigned
3297 get_address_cost (bool symbol_present, bool var_present,
3298 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3300 static bool initialized = false;
3301 static HOST_WIDE_INT rat, off;
3302 static HOST_WIDE_INT min_offset, max_offset;
3303 static unsigned costs[2][2][2][2];
3304 unsigned cost, acost;
3305 rtx seq, addr, base;
3306 bool offset_p, ratio_p;
3307 rtx reg1;
3308 HOST_WIDE_INT s_offset;
3309 unsigned HOST_WIDE_INT mask;
3310 unsigned bits;
3312 if (!initialized)
3314 HOST_WIDE_INT i;
3315 initialized = true;
3317 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3319 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3320 for (i = 1; i <= 1 << 20; i <<= 1)
3322 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3323 if (!memory_address_p (Pmode, addr))
3324 break;
3326 max_offset = i >> 1;
3327 off = max_offset;
3329 for (i = 1; i <= 1 << 20; i <<= 1)
3331 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3332 if (!memory_address_p (Pmode, addr))
3333 break;
3335 min_offset = -(i >> 1);
3337 if (dump_file && (dump_flags & TDF_DETAILS))
3339 fprintf (dump_file, "get_address_cost:\n");
3340 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3341 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3344 rat = 1;
3345 for (i = 2; i <= MAX_RATIO; i++)
3346 if (multiplier_allowed_in_address_p (i))
3348 rat = i;
3349 break;
3353 bits = GET_MODE_BITSIZE (Pmode);
3354 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3355 offset &= mask;
3356 if ((offset >> (bits - 1) & 1))
3357 offset |= ~mask;
3358 s_offset = offset;
3360 cost = 0;
3361 offset_p = (s_offset != 0
3362 && min_offset <= s_offset && s_offset <= max_offset);
3363 ratio_p = (ratio != 1
3364 && multiplier_allowed_in_address_p (ratio));
3366 if (ratio != 1 && !ratio_p)
3367 cost += multiply_by_cost (ratio, Pmode);
3369 if (s_offset && !offset_p && !symbol_present)
3371 cost += add_cost (Pmode);
3372 var_present = true;
3375 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3376 if (!acost)
3378 int old_cse_not_expected;
3379 acost = 0;
3381 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3382 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3383 if (ratio_p)
3384 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3386 if (var_present)
3387 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3389 if (symbol_present)
3391 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3392 if (offset_p)
3393 base = gen_rtx_fmt_e (CONST, Pmode,
3394 gen_rtx_fmt_ee (PLUS, Pmode,
3395 base,
3396 gen_int_mode (off, Pmode)));
3398 else if (offset_p)
3399 base = gen_int_mode (off, Pmode);
3400 else
3401 base = NULL_RTX;
3403 if (base)
3404 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3406 start_sequence ();
3407 /* To avoid splitting addressing modes, pretend that no cse will
3408 follow. */
3409 old_cse_not_expected = cse_not_expected;
3410 cse_not_expected = true;
3411 addr = memory_address (Pmode, addr);
3412 cse_not_expected = old_cse_not_expected;
3413 seq = get_insns ();
3414 end_sequence ();
3416 acost = seq_cost (seq);
3417 acost += address_cost (addr, Pmode);
3419 if (!acost)
3420 acost = 1;
3421 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3424 return cost + acost;
3427 /* Estimates cost of forcing expression EXPR into a variable. */
3429 unsigned
3430 force_expr_to_var_cost (tree expr)
3432 static bool costs_initialized = false;
3433 static unsigned integer_cost;
3434 static unsigned symbol_cost;
3435 static unsigned address_cost;
3436 tree op0, op1;
3437 unsigned cost0, cost1, cost;
3438 enum machine_mode mode;
3440 if (!costs_initialized)
3442 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3443 rtx x = gen_rtx_MEM (DECL_MODE (var),
3444 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3445 tree addr;
3446 tree type = build_pointer_type (integer_type_node);
3448 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3449 2000));
3451 SET_DECL_RTL (var, x);
3452 TREE_STATIC (var) = 1;
3453 addr = build1 (ADDR_EXPR, type, var);
3454 symbol_cost = computation_cost (addr) + 1;
3456 address_cost
3457 = computation_cost (build2 (PLUS_EXPR, type,
3458 addr,
3459 build_int_cst_type (type, 2000))) + 1;
3460 if (dump_file && (dump_flags & TDF_DETAILS))
3462 fprintf (dump_file, "force_expr_to_var_cost:\n");
3463 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3464 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3465 fprintf (dump_file, " address %d\n", (int) address_cost);
3466 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3467 fprintf (dump_file, "\n");
3470 costs_initialized = true;
3473 STRIP_NOPS (expr);
3475 if (SSA_VAR_P (expr))
3476 return 0;
3478 if (TREE_INVARIANT (expr))
3480 if (TREE_CODE (expr) == INTEGER_CST)
3481 return integer_cost;
3483 if (TREE_CODE (expr) == ADDR_EXPR)
3485 tree obj = TREE_OPERAND (expr, 0);
3487 if (TREE_CODE (obj) == VAR_DECL
3488 || TREE_CODE (obj) == PARM_DECL
3489 || TREE_CODE (obj) == RESULT_DECL)
3490 return symbol_cost;
3493 return address_cost;
3496 switch (TREE_CODE (expr))
3498 case PLUS_EXPR:
3499 case MINUS_EXPR:
3500 case MULT_EXPR:
3501 op0 = TREE_OPERAND (expr, 0);
3502 op1 = TREE_OPERAND (expr, 1);
3503 STRIP_NOPS (op0);
3504 STRIP_NOPS (op1);
3506 if (is_gimple_val (op0))
3507 cost0 = 0;
3508 else
3509 cost0 = force_expr_to_var_cost (op0);
3511 if (is_gimple_val (op1))
3512 cost1 = 0;
3513 else
3514 cost1 = force_expr_to_var_cost (op1);
3516 break;
3518 default:
3519 /* Just an arbitrary value, FIXME. */
3520 return target_spill_cost;
3523 mode = TYPE_MODE (TREE_TYPE (expr));
3524 switch (TREE_CODE (expr))
3526 case PLUS_EXPR:
3527 case MINUS_EXPR:
3528 cost = add_cost (mode);
3529 break;
3531 case MULT_EXPR:
3532 if (cst_and_fits_in_hwi (op0))
3533 cost = multiply_by_cost (int_cst_value (op0), mode);
3534 else if (cst_and_fits_in_hwi (op1))
3535 cost = multiply_by_cost (int_cst_value (op1), mode);
3536 else
3537 return target_spill_cost;
3538 break;
3540 default:
3541 gcc_unreachable ();
3544 cost += cost0;
3545 cost += cost1;
3547 /* Bound the cost by target_spill_cost. The parts of complicated
3548 computations often are either loop invariant or at least can
3549 be shared between several iv uses, so letting this grow without
3550 limits would not give reasonable results. */
3551 return cost < target_spill_cost ? cost : target_spill_cost;
3554 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3555 invariants the computation depends on. */
3557 static unsigned
3558 force_var_cost (struct ivopts_data *data,
3559 tree expr, bitmap *depends_on)
3561 if (depends_on)
3563 fd_ivopts_data = data;
3564 walk_tree (&expr, find_depends, depends_on, NULL);
3567 return force_expr_to_var_cost (expr);
3570 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3571 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3572 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3573 invariants the computation depends on. */
3575 static unsigned
3576 split_address_cost (struct ivopts_data *data,
3577 tree addr, bool *symbol_present, bool *var_present,
3578 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3580 tree core;
3581 HOST_WIDE_INT bitsize;
3582 HOST_WIDE_INT bitpos;
3583 tree toffset;
3584 enum machine_mode mode;
3585 int unsignedp, volatilep;
3587 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3588 &unsignedp, &volatilep, false);
3590 if (toffset != 0
3591 || bitpos % BITS_PER_UNIT != 0
3592 || TREE_CODE (core) != VAR_DECL)
3594 *symbol_present = false;
3595 *var_present = true;
3596 fd_ivopts_data = data;
3597 walk_tree (&addr, find_depends, depends_on, NULL);
3598 return target_spill_cost;
3601 *offset += bitpos / BITS_PER_UNIT;
3602 if (TREE_STATIC (core)
3603 || DECL_EXTERNAL (core))
3605 *symbol_present = true;
3606 *var_present = false;
3607 return 0;
3610 *symbol_present = false;
3611 *var_present = true;
3612 return 0;
3615 /* Estimates cost of expressing difference of addresses E1 - E2 as
3616 var + symbol + offset. The value of offset is added to OFFSET,
3617 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3618 part is missing. DEPENDS_ON is a set of the invariants the computation
3619 depends on. */
3621 static unsigned
3622 ptr_difference_cost (struct ivopts_data *data,
3623 tree e1, tree e2, bool *symbol_present, bool *var_present,
3624 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3626 HOST_WIDE_INT diff = 0;
3627 unsigned cost;
3629 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3631 if (ptr_difference_const (e1, e2, &diff))
3633 *offset += diff;
3634 *symbol_present = false;
3635 *var_present = false;
3636 return 0;
3639 if (e2 == integer_zero_node)
3640 return split_address_cost (data, TREE_OPERAND (e1, 0),
3641 symbol_present, var_present, offset, depends_on);
3643 *symbol_present = false;
3644 *var_present = true;
3646 cost = force_var_cost (data, e1, depends_on);
3647 cost += force_var_cost (data, e2, depends_on);
3648 cost += add_cost (Pmode);
3650 return cost;
3653 /* Estimates cost of expressing difference E1 - E2 as
3654 var + symbol + offset. The value of offset is added to OFFSET,
3655 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3656 part is missing. DEPENDS_ON is a set of the invariants the computation
3657 depends on. */
3659 static unsigned
3660 difference_cost (struct ivopts_data *data,
3661 tree e1, tree e2, bool *symbol_present, bool *var_present,
3662 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3664 unsigned cost;
3665 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3666 unsigned HOST_WIDE_INT off1, off2;
3668 e1 = strip_offset (e1, &off1);
3669 e2 = strip_offset (e2, &off2);
3670 *offset += off1 - off2;
3672 STRIP_NOPS (e1);
3673 STRIP_NOPS (e2);
3675 if (TREE_CODE (e1) == ADDR_EXPR)
3676 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3677 depends_on);
3678 *symbol_present = false;
3680 if (operand_equal_p (e1, e2, 0))
3682 *var_present = false;
3683 return 0;
3685 *var_present = true;
3686 if (zero_p (e2))
3687 return force_var_cost (data, e1, depends_on);
3689 if (zero_p (e1))
3691 cost = force_var_cost (data, e2, depends_on);
3692 cost += multiply_by_cost (-1, mode);
3694 return cost;
3697 cost = force_var_cost (data, e1, depends_on);
3698 cost += force_var_cost (data, e2, depends_on);
3699 cost += add_cost (mode);
3701 return cost;
3704 /* Determines the cost of the computation by that USE is expressed
3705 from induction variable CAND. If ADDRESS_P is true, we just need
3706 to create an address from it, otherwise we want to get it into
3707 register. A set of invariants we depend on is stored in
3708 DEPENDS_ON. AT is the statement at that the value is computed. */
3710 static unsigned
3711 get_computation_cost_at (struct ivopts_data *data,
3712 struct iv_use *use, struct iv_cand *cand,
3713 bool address_p, bitmap *depends_on, tree at)
3715 tree ubase = use->iv->base, ustep = use->iv->step;
3716 tree cbase, cstep;
3717 tree utype = TREE_TYPE (ubase), ctype;
3718 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3719 HOST_WIDE_INT ratio, aratio;
3720 bool var_present, symbol_present;
3721 unsigned cost = 0, n_sums;
3723 *depends_on = NULL;
3725 /* Only consider real candidates. */
3726 if (!cand->iv)
3727 return INFTY;
3729 cbase = cand->iv->base;
3730 cstep = cand->iv->step;
3731 ctype = TREE_TYPE (cbase);
3733 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3735 /* We do not have a precision to express the values of use. */
3736 return INFTY;
3739 if (address_p)
3741 /* Do not try to express address of an object with computation based
3742 on address of a different object. This may cause problems in rtl
3743 level alias analysis (that does not expect this to be happening,
3744 as this is illegal in C), and would be unlikely to be useful
3745 anyway. */
3746 if (use->iv->base_object
3747 && cand->iv->base_object
3748 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3749 return INFTY;
3752 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3754 /* TODO -- add direct handling of this case. */
3755 goto fallback;
3758 /* CSTEPI is removed from the offset in case statement is after the
3759 increment. If the step is not constant, we use zero instead.
3760 This is a bit imprecise (there is the extra addition), but
3761 redundancy elimination is likely to transform the code so that
3762 it uses value of the variable before increment anyway,
3763 so it is not that much unrealistic. */
3764 if (cst_and_fits_in_hwi (cstep))
3765 cstepi = int_cst_value (cstep);
3766 else
3767 cstepi = 0;
3769 if (cst_and_fits_in_hwi (ustep)
3770 && cst_and_fits_in_hwi (cstep))
3772 ustepi = int_cst_value (ustep);
3774 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3775 return INFTY;
3777 else
3779 tree rat;
3781 rat = constant_multiple_of (utype, ustep, cstep);
3783 if (!rat)
3784 return INFTY;
3786 if (cst_and_fits_in_hwi (rat))
3787 ratio = int_cst_value (rat);
3788 else if (integer_onep (rat))
3789 ratio = 1;
3790 else if (integer_all_onesp (rat))
3791 ratio = -1;
3792 else
3793 return INFTY;
3796 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3797 or ratio == 1, it is better to handle this like
3799 ubase - ratio * cbase + ratio * var
3801 (also holds in the case ratio == -1, TODO. */
3803 if (cst_and_fits_in_hwi (cbase))
3805 offset = - ratio * int_cst_value (cbase);
3806 cost += difference_cost (data,
3807 ubase, integer_zero_node,
3808 &symbol_present, &var_present, &offset,
3809 depends_on);
3811 else if (ratio == 1)
3813 cost += difference_cost (data,
3814 ubase, cbase,
3815 &symbol_present, &var_present, &offset,
3816 depends_on);
3818 else
3820 cost += force_var_cost (data, cbase, depends_on);
3821 cost += add_cost (TYPE_MODE (ctype));
3822 cost += difference_cost (data,
3823 ubase, integer_zero_node,
3824 &symbol_present, &var_present, &offset,
3825 depends_on);
3828 /* If we are after the increment, the value of the candidate is higher by
3829 one iteration. */
3830 if (stmt_after_increment (data->current_loop, cand, at))
3831 offset -= ratio * cstepi;
3833 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3834 (symbol/var/const parts may be omitted). If we are looking for an address,
3835 find the cost of addressing this. */
3836 if (address_p)
3837 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3839 /* Otherwise estimate the costs for computing the expression. */
3840 aratio = ratio > 0 ? ratio : -ratio;
3841 if (!symbol_present && !var_present && !offset)
3843 if (ratio != 1)
3844 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3846 return cost;
3849 if (aratio != 1)
3850 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3852 n_sums = 1;
3853 if (var_present
3854 /* Symbol + offset should be compile-time computable. */
3855 && (symbol_present || offset))
3856 n_sums++;
3858 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3860 fallback:
3862 /* Just get the expression, expand it and measure the cost. */
3863 tree comp = get_computation_at (data->current_loop, use, cand, at);
3865 if (!comp)
3866 return INFTY;
3868 if (address_p)
3869 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3871 return computation_cost (comp);
3875 /* Determines the cost of the computation by that USE is expressed
3876 from induction variable CAND. If ADDRESS_P is true, we just need
3877 to create an address from it, otherwise we want to get it into
3878 register. A set of invariants we depend on is stored in
3879 DEPENDS_ON. */
3881 static unsigned
3882 get_computation_cost (struct ivopts_data *data,
3883 struct iv_use *use, struct iv_cand *cand,
3884 bool address_p, bitmap *depends_on)
3886 return get_computation_cost_at (data,
3887 use, cand, address_p, depends_on, use->stmt);
3890 /* Determines cost of basing replacement of USE on CAND in a generic
3891 expression. */
3893 static bool
3894 determine_use_iv_cost_generic (struct ivopts_data *data,
3895 struct iv_use *use, struct iv_cand *cand)
3897 bitmap depends_on;
3898 unsigned cost;
3900 /* The simple case first -- if we need to express value of the preserved
3901 original biv, the cost is 0. This also prevents us from counting the
3902 cost of increment twice -- once at this use and once in the cost of
3903 the candidate. */
3904 if (cand->pos == IP_ORIGINAL
3905 && cand->incremented_at == use->stmt)
3907 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3908 return true;
3911 cost = get_computation_cost (data, use, cand, false, &depends_on);
3912 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3914 return cost != INFTY;
3917 /* Determines cost of basing replacement of USE on CAND in an address. */
3919 static bool
3920 determine_use_iv_cost_address (struct ivopts_data *data,
3921 struct iv_use *use, struct iv_cand *cand)
3923 bitmap depends_on;
3924 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3926 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3928 return cost != INFTY;
3931 /* Computes value of induction variable IV in iteration NITER. */
3933 static tree
3934 iv_value (struct iv *iv, tree niter)
3936 tree val;
3937 tree type = TREE_TYPE (iv->base);
3939 niter = fold_convert (type, niter);
3940 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3942 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3945 /* Computes value of candidate CAND at position AT in iteration NITER. */
3947 static tree
3948 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3950 tree val = iv_value (cand->iv, niter);
3951 tree type = TREE_TYPE (cand->iv->base);
3953 if (stmt_after_increment (loop, cand, at))
3954 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3956 return val;
3959 /* Returns period of induction variable iv. */
3961 static tree
3962 iv_period (struct iv *iv)
3964 tree step = iv->step, period, type;
3965 tree pow2div;
3967 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3969 /* Period of the iv is gcd (step, type range). Since type range is power
3970 of two, it suffices to determine the maximum power of two that divides
3971 step. */
3972 pow2div = num_ending_zeros (step);
3973 type = unsigned_type_for (TREE_TYPE (step));
3975 period = build_low_bits_mask (type,
3976 (TYPE_PRECISION (type)
3977 - tree_low_cst (pow2div, 1)));
3979 return period;
3982 /* Returns the comparison operator used when eliminating the iv USE. */
3984 static enum tree_code
3985 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3987 struct loop *loop = data->current_loop;
3988 basic_block ex_bb;
3989 edge exit;
3991 ex_bb = bb_for_stmt (use->stmt);
3992 exit = EDGE_SUCC (ex_bb, 0);
3993 if (flow_bb_inside_loop_p (loop, exit->dest))
3994 exit = EDGE_SUCC (ex_bb, 1);
3996 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3999 /* Check whether it is possible to express the condition in USE by comparison
4000 of candidate CAND. If so, store the value compared with to BOUND. */
4002 static bool
4003 may_eliminate_iv (struct ivopts_data *data,
4004 struct iv_use *use, struct iv_cand *cand, tree *bound)
4006 basic_block ex_bb;
4007 edge exit;
4008 tree nit, nit_type;
4009 tree wider_type, period, per_type;
4010 struct loop *loop = data->current_loop;
4012 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4013 return false;
4015 /* For now works only for exits that dominate the loop latch. TODO -- extend
4016 for other conditions inside loop body. */
4017 ex_bb = bb_for_stmt (use->stmt);
4018 if (use->stmt != last_stmt (ex_bb)
4019 || TREE_CODE (use->stmt) != COND_EXPR)
4020 return false;
4021 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4022 return false;
4024 exit = EDGE_SUCC (ex_bb, 0);
4025 if (flow_bb_inside_loop_p (loop, exit->dest))
4026 exit = EDGE_SUCC (ex_bb, 1);
4027 if (flow_bb_inside_loop_p (loop, exit->dest))
4028 return false;
4030 nit = niter_for_exit (data, exit);
4031 if (!nit)
4032 return false;
4034 nit_type = TREE_TYPE (nit);
4036 /* Determine whether we may use the variable to test whether niter iterations
4037 elapsed. This is the case iff the period of the induction variable is
4038 greater than the number of iterations. */
4039 period = iv_period (cand->iv);
4040 if (!period)
4041 return false;
4042 per_type = TREE_TYPE (period);
4044 wider_type = TREE_TYPE (period);
4045 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4046 wider_type = per_type;
4047 else
4048 wider_type = nit_type;
4050 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4051 fold_convert (wider_type, period),
4052 fold_convert (wider_type, nit))))
4053 return false;
4055 *bound = cand_value_at (loop, cand, use->stmt, nit);
4056 return true;
4059 /* Determines cost of basing replacement of USE on CAND in a condition. */
4061 static bool
4062 determine_use_iv_cost_condition (struct ivopts_data *data,
4063 struct iv_use *use, struct iv_cand *cand)
4065 tree bound = NULL_TREE, op, cond;
4066 bitmap depends_on = NULL;
4067 unsigned cost;
4069 /* Only consider real candidates. */
4070 if (!cand->iv)
4072 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4073 return false;
4076 if (may_eliminate_iv (data, use, cand, &bound))
4078 cost = force_var_cost (data, bound, &depends_on);
4080 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4081 return cost != INFTY;
4084 /* The induction variable elimination failed; just express the original
4085 giv. If it is compared with an invariant, note that we cannot get
4086 rid of it. */
4087 cost = get_computation_cost (data, use, cand, false, &depends_on);
4089 cond = *use->op_p;
4090 if (TREE_CODE (cond) != SSA_NAME)
4092 op = TREE_OPERAND (cond, 0);
4093 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4094 op = TREE_OPERAND (cond, 1);
4095 if (TREE_CODE (op) == SSA_NAME)
4097 op = get_iv (data, op)->base;
4098 fd_ivopts_data = data;
4099 walk_tree (&op, find_depends, &depends_on, NULL);
4103 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4104 return cost != INFTY;
4107 /* Checks whether it is possible to replace the final value of USE by
4108 a direct computation. If so, the formula is stored to *VALUE. */
4110 static bool
4111 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4112 tree *value)
4114 struct loop *loop = data->current_loop;
4115 edge exit;
4116 tree nit;
4118 exit = single_dom_exit (loop);
4119 if (!exit)
4120 return false;
4122 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4123 bb_for_stmt (use->stmt)));
4125 nit = niter_for_single_dom_exit (data);
4126 if (!nit)
4127 return false;
4129 *value = iv_value (use->iv, nit);
4131 return true;
4134 /* Determines cost of replacing final value of USE using CAND. */
4136 static bool
4137 determine_use_iv_cost_outer (struct ivopts_data *data,
4138 struct iv_use *use, struct iv_cand *cand)
4140 bitmap depends_on;
4141 unsigned cost;
4142 edge exit;
4143 tree value = NULL_TREE;
4144 struct loop *loop = data->current_loop;
4146 /* The simple case first -- if we need to express value of the preserved
4147 original biv, the cost is 0. This also prevents us from counting the
4148 cost of increment twice -- once at this use and once in the cost of
4149 the candidate. */
4150 if (cand->pos == IP_ORIGINAL
4151 && cand->incremented_at == use->stmt)
4153 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4154 return true;
4157 if (!cand->iv)
4159 if (!may_replace_final_value (data, use, &value))
4161 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4162 return false;
4165 depends_on = NULL;
4166 cost = force_var_cost (data, value, &depends_on);
4168 cost /= AVG_LOOP_NITER (loop);
4170 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4171 return cost != INFTY;
4174 exit = single_dom_exit (loop);
4175 if (exit)
4177 /* If there is just a single exit, we may use value of the candidate
4178 after we take it to determine the value of use. */
4179 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4180 last_stmt (exit->src));
4181 if (cost != INFTY)
4182 cost /= AVG_LOOP_NITER (loop);
4184 else
4186 /* Otherwise we just need to compute the iv. */
4187 cost = get_computation_cost (data, use, cand, false, &depends_on);
4190 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4192 return cost != INFTY;
4195 /* Determines cost of basing replacement of USE on CAND. Returns false
4196 if USE cannot be based on CAND. */
4198 static bool
4199 determine_use_iv_cost (struct ivopts_data *data,
4200 struct iv_use *use, struct iv_cand *cand)
4202 switch (use->type)
4204 case USE_NONLINEAR_EXPR:
4205 return determine_use_iv_cost_generic (data, use, cand);
4207 case USE_OUTER:
4208 return determine_use_iv_cost_outer (data, use, cand);
4210 case USE_ADDRESS:
4211 return determine_use_iv_cost_address (data, use, cand);
4213 case USE_COMPARE:
4214 return determine_use_iv_cost_condition (data, use, cand);
4216 default:
4217 gcc_unreachable ();
4221 /* Determines costs of basing the use of the iv on an iv candidate. */
4223 static void
4224 determine_use_iv_costs (struct ivopts_data *data)
4226 unsigned i, j;
4227 struct iv_use *use;
4228 struct iv_cand *cand;
4229 bitmap to_clear = BITMAP_ALLOC (NULL);
4231 alloc_use_cost_map (data);
4233 for (i = 0; i < n_iv_uses (data); i++)
4235 use = iv_use (data, i);
4237 if (data->consider_all_candidates)
4239 for (j = 0; j < n_iv_cands (data); j++)
4241 cand = iv_cand (data, j);
4242 determine_use_iv_cost (data, use, cand);
4245 else
4247 bitmap_iterator bi;
4249 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4251 cand = iv_cand (data, j);
4252 if (!determine_use_iv_cost (data, use, cand))
4253 bitmap_set_bit (to_clear, j);
4256 /* Remove the candidates for that the cost is infinite from
4257 the list of related candidates. */
4258 bitmap_and_compl_into (use->related_cands, to_clear);
4259 bitmap_clear (to_clear);
4263 BITMAP_FREE (to_clear);
4265 if (dump_file && (dump_flags & TDF_DETAILS))
4267 fprintf (dump_file, "Use-candidate costs:\n");
4269 for (i = 0; i < n_iv_uses (data); i++)
4271 use = iv_use (data, i);
4273 fprintf (dump_file, "Use %d:\n", i);
4274 fprintf (dump_file, " cand\tcost\tdepends on\n");
4275 for (j = 0; j < use->n_map_members; j++)
4277 if (!use->cost_map[j].cand
4278 || use->cost_map[j].cost == INFTY)
4279 continue;
4281 fprintf (dump_file, " %d\t%d\t",
4282 use->cost_map[j].cand->id,
4283 use->cost_map[j].cost);
4284 if (use->cost_map[j].depends_on)
4285 bitmap_print (dump_file,
4286 use->cost_map[j].depends_on, "","");
4287 fprintf (dump_file, "\n");
4290 fprintf (dump_file, "\n");
4292 fprintf (dump_file, "\n");
4296 /* Determines cost of the candidate CAND. */
4298 static void
4299 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4301 unsigned cost_base, cost_step;
4302 tree base;
4304 if (!cand->iv)
4306 cand->cost = 0;
4307 return;
4310 /* There are two costs associated with the candidate -- its increment
4311 and its initialization. The second is almost negligible for any loop
4312 that rolls enough, so we take it just very little into account. */
4314 base = cand->iv->base;
4315 cost_base = force_var_cost (data, base, NULL);
4316 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4318 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4320 /* Prefer the original iv unless we may gain something by replacing it;
4321 this is not really relevant for artificial ivs created by other
4322 passes. */
4323 if (cand->pos == IP_ORIGINAL
4324 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4325 cand->cost--;
4327 /* Prefer not to insert statements into latch unless there are some
4328 already (so that we do not create unnecessary jumps). */
4329 if (cand->pos == IP_END
4330 && empty_block_p (ip_end_pos (data->current_loop)))
4331 cand->cost++;
4334 /* Determines costs of computation of the candidates. */
4336 static void
4337 determine_iv_costs (struct ivopts_data *data)
4339 unsigned i;
4341 if (dump_file && (dump_flags & TDF_DETAILS))
4343 fprintf (dump_file, "Candidate costs:\n");
4344 fprintf (dump_file, " cand\tcost\n");
4347 for (i = 0; i < n_iv_cands (data); i++)
4349 struct iv_cand *cand = iv_cand (data, i);
4351 determine_iv_cost (data, cand);
4353 if (dump_file && (dump_flags & TDF_DETAILS))
4354 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4357 if (dump_file && (dump_flags & TDF_DETAILS))
4358 fprintf (dump_file, "\n");
4361 /* Calculates cost for having SIZE induction variables. */
4363 static unsigned
4364 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4366 return global_cost_for_size (size,
4367 loop_data (data->current_loop)->regs_used,
4368 n_iv_uses (data));
4371 /* For each size of the induction variable set determine the penalty. */
4373 static void
4374 determine_set_costs (struct ivopts_data *data)
4376 unsigned j, n;
4377 tree phi, op;
4378 struct loop *loop = data->current_loop;
4379 bitmap_iterator bi;
4381 /* We use the following model (definitely improvable, especially the
4382 cost function -- TODO):
4384 We estimate the number of registers available (using MD data), name it A.
4386 We estimate the number of registers used by the loop, name it U. This
4387 number is obtained as the number of loop phi nodes (not counting virtual
4388 registers and bivs) + the number of variables from outside of the loop.
4390 We set a reserve R (free regs that are used for temporary computations,
4391 etc.). For now the reserve is a constant 3.
4393 Let I be the number of induction variables.
4395 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4396 make a lot of ivs without a reason).
4397 -- if A - R < U + I <= A, the cost is I * PRES_COST
4398 -- if U + I > A, the cost is I * PRES_COST and
4399 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4401 if (dump_file && (dump_flags & TDF_DETAILS))
4403 fprintf (dump_file, "Global costs:\n");
4404 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4405 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4406 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4407 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4410 n = 0;
4411 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4413 op = PHI_RESULT (phi);
4415 if (!is_gimple_reg (op))
4416 continue;
4418 if (get_iv (data, op))
4419 continue;
4421 n++;
4424 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4426 struct version_info *info = ver_info (data, j);
4428 if (info->inv_id && info->has_nonlin_use)
4429 n++;
4432 loop_data (loop)->regs_used = n;
4433 if (dump_file && (dump_flags & TDF_DETAILS))
4434 fprintf (dump_file, " regs_used %d\n", n);
4436 if (dump_file && (dump_flags & TDF_DETAILS))
4438 fprintf (dump_file, " cost for size:\n");
4439 fprintf (dump_file, " ivs\tcost\n");
4440 for (j = 0; j <= 2 * target_avail_regs; j++)
4441 fprintf (dump_file, " %d\t%d\n", j,
4442 ivopts_global_cost_for_size (data, j));
4443 fprintf (dump_file, "\n");
4447 /* Returns true if A is a cheaper cost pair than B. */
4449 static bool
4450 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4452 if (!a)
4453 return false;
4455 if (!b)
4456 return true;
4458 if (a->cost < b->cost)
4459 return true;
4461 if (a->cost > b->cost)
4462 return false;
4464 /* In case the costs are the same, prefer the cheaper candidate. */
4465 if (a->cand->cost < b->cand->cost)
4466 return true;
4468 return false;
4471 /* Computes the cost field of IVS structure. */
4473 static void
4474 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4476 unsigned cost = 0;
4478 cost += ivs->cand_use_cost;
4479 cost += ivs->cand_cost;
4480 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4482 ivs->cost = cost;
4485 /* Remove invariants in set INVS to set IVS. */
4487 static void
4488 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4490 bitmap_iterator bi;
4491 unsigned iid;
4493 if (!invs)
4494 return;
4496 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4498 ivs->n_invariant_uses[iid]--;
4499 if (ivs->n_invariant_uses[iid] == 0)
4500 ivs->n_regs--;
4504 /* Set USE not to be expressed by any candidate in IVS. */
4506 static void
4507 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4508 struct iv_use *use)
4510 unsigned uid = use->id, cid;
4511 struct cost_pair *cp;
4513 cp = ivs->cand_for_use[uid];
4514 if (!cp)
4515 return;
4516 cid = cp->cand->id;
4518 ivs->bad_uses++;
4519 ivs->cand_for_use[uid] = NULL;
4520 ivs->n_cand_uses[cid]--;
4522 if (ivs->n_cand_uses[cid] == 0)
4524 bitmap_clear_bit (ivs->cands, cid);
4525 /* Do not count the pseudocandidates. */
4526 if (cp->cand->iv)
4527 ivs->n_regs--;
4528 ivs->n_cands--;
4529 ivs->cand_cost -= cp->cand->cost;
4531 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4534 ivs->cand_use_cost -= cp->cost;
4536 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4537 iv_ca_recount_cost (data, ivs);
4540 /* Add invariants in set INVS to set IVS. */
4542 static void
4543 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4545 bitmap_iterator bi;
4546 unsigned iid;
4548 if (!invs)
4549 return;
4551 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4553 ivs->n_invariant_uses[iid]++;
4554 if (ivs->n_invariant_uses[iid] == 1)
4555 ivs->n_regs++;
4559 /* Set cost pair for USE in set IVS to CP. */
4561 static void
4562 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4563 struct iv_use *use, struct cost_pair *cp)
4565 unsigned uid = use->id, cid;
4567 if (ivs->cand_for_use[uid] == cp)
4568 return;
4570 if (ivs->cand_for_use[uid])
4571 iv_ca_set_no_cp (data, ivs, use);
4573 if (cp)
4575 cid = cp->cand->id;
4577 ivs->bad_uses--;
4578 ivs->cand_for_use[uid] = cp;
4579 ivs->n_cand_uses[cid]++;
4580 if (ivs->n_cand_uses[cid] == 1)
4582 bitmap_set_bit (ivs->cands, cid);
4583 /* Do not count the pseudocandidates. */
4584 if (cp->cand->iv)
4585 ivs->n_regs++;
4586 ivs->n_cands++;
4587 ivs->cand_cost += cp->cand->cost;
4589 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4592 ivs->cand_use_cost += cp->cost;
4593 iv_ca_set_add_invariants (ivs, cp->depends_on);
4594 iv_ca_recount_cost (data, ivs);
4598 /* Extend set IVS by expressing USE by some of the candidates in it
4599 if possible. */
4601 static void
4602 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4603 struct iv_use *use)
4605 struct cost_pair *best_cp = NULL, *cp;
4606 bitmap_iterator bi;
4607 unsigned i;
4609 gcc_assert (ivs->upto >= use->id);
4611 if (ivs->upto == use->id)
4613 ivs->upto++;
4614 ivs->bad_uses++;
4617 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4619 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4621 if (cheaper_cost_pair (cp, best_cp))
4622 best_cp = cp;
4625 iv_ca_set_cp (data, ivs, use, best_cp);
4628 /* Get cost for assignment IVS. */
4630 static unsigned
4631 iv_ca_cost (struct iv_ca *ivs)
4633 return (ivs->bad_uses ? INFTY : ivs->cost);
4636 /* Returns true if all dependences of CP are among invariants in IVS. */
4638 static bool
4639 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4641 unsigned i;
4642 bitmap_iterator bi;
4644 if (!cp->depends_on)
4645 return true;
4647 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4649 if (ivs->n_invariant_uses[i] == 0)
4650 return false;
4653 return true;
4656 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4657 it before NEXT_CHANGE. */
4659 static struct iv_ca_delta *
4660 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4661 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4663 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4665 change->use = use;
4666 change->old_cp = old_cp;
4667 change->new_cp = new_cp;
4668 change->next_change = next_change;
4670 return change;
4673 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4674 are rewritten. */
4676 static struct iv_ca_delta *
4677 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4679 struct iv_ca_delta *last;
4681 if (!l2)
4682 return l1;
4684 if (!l1)
4685 return l2;
4687 for (last = l1; last->next_change; last = last->next_change)
4688 continue;
4689 last->next_change = l2;
4691 return l1;
4694 /* Returns candidate by that USE is expressed in IVS. */
4696 static struct cost_pair *
4697 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4699 return ivs->cand_for_use[use->id];
4702 /* Reverse the list of changes DELTA, forming the inverse to it. */
4704 static struct iv_ca_delta *
4705 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4707 struct iv_ca_delta *act, *next, *prev = NULL;
4708 struct cost_pair *tmp;
4710 for (act = delta; act; act = next)
4712 next = act->next_change;
4713 act->next_change = prev;
4714 prev = act;
4716 tmp = act->old_cp;
4717 act->old_cp = act->new_cp;
4718 act->new_cp = tmp;
4721 return prev;
4724 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4725 reverted instead. */
4727 static void
4728 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4729 struct iv_ca_delta *delta, bool forward)
4731 struct cost_pair *from, *to;
4732 struct iv_ca_delta *act;
4734 if (!forward)
4735 delta = iv_ca_delta_reverse (delta);
4737 for (act = delta; act; act = act->next_change)
4739 from = act->old_cp;
4740 to = act->new_cp;
4741 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4742 iv_ca_set_cp (data, ivs, act->use, to);
4745 if (!forward)
4746 iv_ca_delta_reverse (delta);
4749 /* Returns true if CAND is used in IVS. */
4751 static bool
4752 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4754 return ivs->n_cand_uses[cand->id] > 0;
4757 /* Returns number of induction variable candidates in the set IVS. */
4759 static unsigned
4760 iv_ca_n_cands (struct iv_ca *ivs)
4762 return ivs->n_cands;
4765 /* Free the list of changes DELTA. */
4767 static void
4768 iv_ca_delta_free (struct iv_ca_delta **delta)
4770 struct iv_ca_delta *act, *next;
4772 for (act = *delta; act; act = next)
4774 next = act->next_change;
4775 free (act);
4778 *delta = NULL;
4781 /* Allocates new iv candidates assignment. */
4783 static struct iv_ca *
4784 iv_ca_new (struct ivopts_data *data)
4786 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4788 nw->upto = 0;
4789 nw->bad_uses = 0;
4790 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4791 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4792 nw->cands = BITMAP_ALLOC (NULL);
4793 nw->n_cands = 0;
4794 nw->n_regs = 0;
4795 nw->cand_use_cost = 0;
4796 nw->cand_cost = 0;
4797 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4798 nw->cost = 0;
4800 return nw;
4803 /* Free memory occupied by the set IVS. */
4805 static void
4806 iv_ca_free (struct iv_ca **ivs)
4808 free ((*ivs)->cand_for_use);
4809 free ((*ivs)->n_cand_uses);
4810 BITMAP_FREE ((*ivs)->cands);
4811 free ((*ivs)->n_invariant_uses);
4812 free (*ivs);
4813 *ivs = NULL;
4816 /* Dumps IVS to FILE. */
4818 static void
4819 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4821 const char *pref = " invariants ";
4822 unsigned i;
4824 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4825 bitmap_print (file, ivs->cands, " candidates ","\n");
4827 for (i = 1; i <= data->max_inv_id; i++)
4828 if (ivs->n_invariant_uses[i])
4830 fprintf (file, "%s%d", pref, i);
4831 pref = ", ";
4833 fprintf (file, "\n");
4836 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4837 new set, and store differences in DELTA. Number of induction variables
4838 in the new set is stored to N_IVS. */
4840 static unsigned
4841 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4842 struct iv_cand *cand, struct iv_ca_delta **delta,
4843 unsigned *n_ivs)
4845 unsigned i, cost;
4846 struct iv_use *use;
4847 struct cost_pair *old_cp, *new_cp;
4849 *delta = NULL;
4850 for (i = 0; i < ivs->upto; i++)
4852 use = iv_use (data, i);
4853 old_cp = iv_ca_cand_for_use (ivs, use);
4855 if (old_cp
4856 && old_cp->cand == cand)
4857 continue;
4859 new_cp = get_use_iv_cost (data, use, cand);
4860 if (!new_cp)
4861 continue;
4863 if (!iv_ca_has_deps (ivs, new_cp))
4864 continue;
4866 if (!cheaper_cost_pair (new_cp, old_cp))
4867 continue;
4869 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4872 iv_ca_delta_commit (data, ivs, *delta, true);
4873 cost = iv_ca_cost (ivs);
4874 if (n_ivs)
4875 *n_ivs = iv_ca_n_cands (ivs);
4876 iv_ca_delta_commit (data, ivs, *delta, false);
4878 return cost;
4881 /* Try narrowing set IVS by removing CAND. Return the cost of
4882 the new set and store the differences in DELTA. */
4884 static unsigned
4885 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4886 struct iv_cand *cand, struct iv_ca_delta **delta)
4888 unsigned i, ci;
4889 struct iv_use *use;
4890 struct cost_pair *old_cp, *new_cp, *cp;
4891 bitmap_iterator bi;
4892 struct iv_cand *cnd;
4893 unsigned cost;
4895 *delta = NULL;
4896 for (i = 0; i < n_iv_uses (data); i++)
4898 use = iv_use (data, i);
4900 old_cp = iv_ca_cand_for_use (ivs, use);
4901 if (old_cp->cand != cand)
4902 continue;
4904 new_cp = NULL;
4906 if (data->consider_all_candidates)
4908 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4910 if (ci == cand->id)
4911 continue;
4913 cnd = iv_cand (data, ci);
4915 cp = get_use_iv_cost (data, use, cnd);
4916 if (!cp)
4917 continue;
4918 if (!iv_ca_has_deps (ivs, cp))
4919 continue;
4921 if (!cheaper_cost_pair (cp, new_cp))
4922 continue;
4924 new_cp = cp;
4927 else
4929 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4931 if (ci == cand->id)
4932 continue;
4934 cnd = iv_cand (data, ci);
4936 cp = get_use_iv_cost (data, use, cnd);
4937 if (!cp)
4938 continue;
4939 if (!iv_ca_has_deps (ivs, cp))
4940 continue;
4942 if (!cheaper_cost_pair (cp, new_cp))
4943 continue;
4945 new_cp = cp;
4949 if (!new_cp)
4951 iv_ca_delta_free (delta);
4952 return INFTY;
4955 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4958 iv_ca_delta_commit (data, ivs, *delta, true);
4959 cost = iv_ca_cost (ivs);
4960 iv_ca_delta_commit (data, ivs, *delta, false);
4962 return cost;
4965 /* Try optimizing the set of candidates IVS by removing candidates different
4966 from to EXCEPT_CAND from it. Return cost of the new set, and store
4967 differences in DELTA. */
4969 static unsigned
4970 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4971 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4973 bitmap_iterator bi;
4974 struct iv_ca_delta *act_delta, *best_delta;
4975 unsigned i, best_cost, acost;
4976 struct iv_cand *cand;
4978 best_delta = NULL;
4979 best_cost = iv_ca_cost (ivs);
4981 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4983 cand = iv_cand (data, i);
4985 if (cand == except_cand)
4986 continue;
4988 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4990 if (acost < best_cost)
4992 best_cost = acost;
4993 iv_ca_delta_free (&best_delta);
4994 best_delta = act_delta;
4996 else
4997 iv_ca_delta_free (&act_delta);
5000 if (!best_delta)
5002 *delta = NULL;
5003 return best_cost;
5006 /* Recurse to possibly remove other unnecessary ivs. */
5007 iv_ca_delta_commit (data, ivs, best_delta, true);
5008 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5009 iv_ca_delta_commit (data, ivs, best_delta, false);
5010 *delta = iv_ca_delta_join (best_delta, *delta);
5011 return best_cost;
5014 /* Tries to extend the sets IVS in the best possible way in order
5015 to express the USE. */
5017 static bool
5018 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5019 struct iv_use *use)
5021 unsigned best_cost, act_cost;
5022 unsigned i;
5023 bitmap_iterator bi;
5024 struct iv_cand *cand;
5025 struct iv_ca_delta *best_delta = NULL, *act_delta;
5026 struct cost_pair *cp;
5028 iv_ca_add_use (data, ivs, use);
5029 best_cost = iv_ca_cost (ivs);
5031 cp = iv_ca_cand_for_use (ivs, use);
5032 if (cp)
5034 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5035 iv_ca_set_no_cp (data, ivs, use);
5038 /* First try important candidates. Only if it fails, try the specific ones.
5039 Rationale -- in loops with many variables the best choice often is to use
5040 just one generic biv. If we added here many ivs specific to the uses,
5041 the optimization algorithm later would be likely to get stuck in a local
5042 minimum, thus causing us to create too many ivs. The approach from
5043 few ivs to more seems more likely to be successful -- starting from few
5044 ivs, replacing an expensive use by a specific iv should always be a
5045 win. */
5046 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5048 cand = iv_cand (data, i);
5050 if (iv_ca_cand_used_p (ivs, cand))
5051 continue;
5053 cp = get_use_iv_cost (data, use, cand);
5054 if (!cp)
5055 continue;
5057 iv_ca_set_cp (data, ivs, use, cp);
5058 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5059 iv_ca_set_no_cp (data, ivs, use);
5060 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5062 if (act_cost < best_cost)
5064 best_cost = act_cost;
5066 iv_ca_delta_free (&best_delta);
5067 best_delta = act_delta;
5069 else
5070 iv_ca_delta_free (&act_delta);
5073 if (best_cost == INFTY)
5075 for (i = 0; i < use->n_map_members; i++)
5077 cp = use->cost_map + i;
5078 cand = cp->cand;
5079 if (!cand)
5080 continue;
5082 /* Already tried this. */
5083 if (cand->important)
5084 continue;
5086 if (iv_ca_cand_used_p (ivs, cand))
5087 continue;
5089 act_delta = NULL;
5090 iv_ca_set_cp (data, ivs, use, cp);
5091 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5092 iv_ca_set_no_cp (data, ivs, use);
5093 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5094 cp, act_delta);
5096 if (act_cost < best_cost)
5098 best_cost = act_cost;
5100 if (best_delta)
5101 iv_ca_delta_free (&best_delta);
5102 best_delta = act_delta;
5104 else
5105 iv_ca_delta_free (&act_delta);
5109 iv_ca_delta_commit (data, ivs, best_delta, true);
5110 iv_ca_delta_free (&best_delta);
5112 return (best_cost != INFTY);
5115 /* Finds an initial assignment of candidates to uses. */
5117 static struct iv_ca *
5118 get_initial_solution (struct ivopts_data *data)
5120 struct iv_ca *ivs = iv_ca_new (data);
5121 unsigned i;
5123 for (i = 0; i < n_iv_uses (data); i++)
5124 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5126 iv_ca_free (&ivs);
5127 return NULL;
5130 return ivs;
5133 /* Tries to improve set of induction variables IVS. */
5135 static bool
5136 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5138 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5139 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5140 struct iv_cand *cand;
5142 /* Try extending the set of induction variables by one. */
5143 for (i = 0; i < n_iv_cands (data); i++)
5145 cand = iv_cand (data, i);
5147 if (iv_ca_cand_used_p (ivs, cand))
5148 continue;
5150 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5151 if (!act_delta)
5152 continue;
5154 /* If we successfully added the candidate and the set is small enough,
5155 try optimizing it by removing other candidates. */
5156 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5158 iv_ca_delta_commit (data, ivs, act_delta, true);
5159 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5160 iv_ca_delta_commit (data, ivs, act_delta, false);
5161 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5164 if (acost < best_cost)
5166 best_cost = acost;
5167 iv_ca_delta_free (&best_delta);
5168 best_delta = act_delta;
5170 else
5171 iv_ca_delta_free (&act_delta);
5174 if (!best_delta)
5176 /* Try removing the candidates from the set instead. */
5177 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5179 /* Nothing more we can do. */
5180 if (!best_delta)
5181 return false;
5184 iv_ca_delta_commit (data, ivs, best_delta, true);
5185 gcc_assert (best_cost == iv_ca_cost (ivs));
5186 iv_ca_delta_free (&best_delta);
5187 return true;
5190 /* Attempts to find the optimal set of induction variables. We do simple
5191 greedy heuristic -- we try to replace at most one candidate in the selected
5192 solution and remove the unused ivs while this improves the cost. */
5194 static struct iv_ca *
5195 find_optimal_iv_set (struct ivopts_data *data)
5197 unsigned i;
5198 struct iv_ca *set;
5199 struct iv_use *use;
5201 /* Get the initial solution. */
5202 set = get_initial_solution (data);
5203 if (!set)
5205 if (dump_file && (dump_flags & TDF_DETAILS))
5206 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5207 return NULL;
5210 if (dump_file && (dump_flags & TDF_DETAILS))
5212 fprintf (dump_file, "Initial set of candidates:\n");
5213 iv_ca_dump (data, dump_file, set);
5216 while (try_improve_iv_set (data, set))
5218 if (dump_file && (dump_flags & TDF_DETAILS))
5220 fprintf (dump_file, "Improved to:\n");
5221 iv_ca_dump (data, dump_file, set);
5225 if (dump_file && (dump_flags & TDF_DETAILS))
5226 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5228 for (i = 0; i < n_iv_uses (data); i++)
5230 use = iv_use (data, i);
5231 use->selected = iv_ca_cand_for_use (set, use)->cand;
5234 return set;
5237 /* Creates a new induction variable corresponding to CAND. */
5239 static void
5240 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5242 block_stmt_iterator incr_pos;
5243 tree base;
5244 bool after = false;
5246 if (!cand->iv)
5247 return;
5249 switch (cand->pos)
5251 case IP_NORMAL:
5252 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5253 break;
5255 case IP_END:
5256 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5257 after = true;
5258 break;
5260 case IP_ORIGINAL:
5261 /* Mark that the iv is preserved. */
5262 name_info (data, cand->var_before)->preserve_biv = true;
5263 name_info (data, cand->var_after)->preserve_biv = true;
5265 /* Rewrite the increment so that it uses var_before directly. */
5266 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5268 return;
5271 gimple_add_tmp_var (cand->var_before);
5272 add_referenced_tmp_var (cand->var_before);
5274 base = unshare_expr (cand->iv->base);
5276 create_iv (base, unshare_expr (cand->iv->step),
5277 cand->var_before, data->current_loop,
5278 &incr_pos, after, &cand->var_before, &cand->var_after);
5281 /* Creates new induction variables described in SET. */
5283 static void
5284 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5286 unsigned i;
5287 struct iv_cand *cand;
5288 bitmap_iterator bi;
5290 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5292 cand = iv_cand (data, i);
5293 create_new_iv (data, cand);
5297 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5298 is true, remove also the ssa name defined by the statement. */
5300 static void
5301 remove_statement (tree stmt, bool including_defined_name)
5303 if (TREE_CODE (stmt) == PHI_NODE)
5305 if (!including_defined_name)
5307 /* Prevent the ssa name defined by the statement from being removed. */
5308 SET_PHI_RESULT (stmt, NULL);
5310 remove_phi_node (stmt, NULL_TREE);
5312 else
5314 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5316 bsi_remove (&bsi);
5320 /* Rewrites USE (definition of iv used in a nonlinear expression)
5321 using candidate CAND. */
5323 static void
5324 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5325 struct iv_use *use, struct iv_cand *cand)
5327 tree comp;
5328 tree op, stmts, tgt, ass;
5329 block_stmt_iterator bsi, pbsi;
5331 /* An important special case -- if we are asked to express value of
5332 the original iv by itself, just exit; there is no need to
5333 introduce a new computation (that might also need casting the
5334 variable to unsigned and back). */
5335 if (cand->pos == IP_ORIGINAL
5336 && cand->incremented_at == use->stmt)
5338 tree step, ctype, utype;
5339 enum tree_code incr_code = PLUS_EXPR;
5341 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5342 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5344 step = cand->iv->step;
5345 ctype = TREE_TYPE (step);
5346 utype = TREE_TYPE (cand->var_after);
5347 if (TREE_CODE (step) == NEGATE_EXPR)
5349 incr_code = MINUS_EXPR;
5350 step = TREE_OPERAND (step, 0);
5353 /* Check whether we may leave the computation unchanged.
5354 This is the case only if it does not rely on other
5355 computations in the loop -- otherwise, the computation
5356 we rely upon may be removed in remove_unused_ivs,
5357 thus leading to ICE. */
5358 op = TREE_OPERAND (use->stmt, 1);
5359 if (TREE_CODE (op) == PLUS_EXPR
5360 || TREE_CODE (op) == MINUS_EXPR)
5362 if (TREE_OPERAND (op, 0) == cand->var_before)
5363 op = TREE_OPERAND (op, 1);
5364 else if (TREE_CODE (op) == PLUS_EXPR
5365 && TREE_OPERAND (op, 1) == cand->var_before)
5366 op = TREE_OPERAND (op, 0);
5367 else
5368 op = NULL_TREE;
5370 else
5371 op = NULL_TREE;
5373 if (op
5374 && (TREE_CODE (op) == INTEGER_CST
5375 || operand_equal_p (op, step, 0)))
5376 return;
5378 /* Otherwise, add the necessary computations to express
5379 the iv. */
5380 op = fold_convert (ctype, cand->var_before);
5381 comp = fold_convert (utype,
5382 build2 (incr_code, ctype, op,
5383 unshare_expr (step)));
5385 else
5386 comp = get_computation (data->current_loop, use, cand);
5388 switch (TREE_CODE (use->stmt))
5390 case PHI_NODE:
5391 tgt = PHI_RESULT (use->stmt);
5393 /* If we should keep the biv, do not replace it. */
5394 if (name_info (data, tgt)->preserve_biv)
5395 return;
5397 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5398 while (!bsi_end_p (pbsi)
5399 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5401 bsi = pbsi;
5402 bsi_next (&pbsi);
5404 break;
5406 case MODIFY_EXPR:
5407 tgt = TREE_OPERAND (use->stmt, 0);
5408 bsi = bsi_for_stmt (use->stmt);
5409 break;
5411 default:
5412 gcc_unreachable ();
5415 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5417 if (TREE_CODE (use->stmt) == PHI_NODE)
5419 if (stmts)
5420 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5421 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5422 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5423 remove_statement (use->stmt, false);
5424 SSA_NAME_DEF_STMT (tgt) = ass;
5426 else
5428 if (stmts)
5429 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5430 TREE_OPERAND (use->stmt, 1) = op;
5434 /* Replaces ssa name in index IDX by its basic variable. Callback for
5435 for_each_index. */
5437 static bool
5438 idx_remove_ssa_names (tree base, tree *idx,
5439 void *data ATTRIBUTE_UNUSED)
5441 tree *op;
5443 if (TREE_CODE (*idx) == SSA_NAME)
5444 *idx = SSA_NAME_VAR (*idx);
5446 if (TREE_CODE (base) == ARRAY_REF)
5448 op = &TREE_OPERAND (base, 2);
5449 if (*op
5450 && TREE_CODE (*op) == SSA_NAME)
5451 *op = SSA_NAME_VAR (*op);
5452 op = &TREE_OPERAND (base, 3);
5453 if (*op
5454 && TREE_CODE (*op) == SSA_NAME)
5455 *op = SSA_NAME_VAR (*op);
5458 return true;
5461 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5463 static tree
5464 unshare_and_remove_ssa_names (tree ref)
5466 ref = unshare_expr (ref);
5467 for_each_index (&ref, idx_remove_ssa_names, NULL);
5469 return ref;
5472 /* Extract the alias analysis info for the memory reference REF. There are
5473 several ways how this information may be stored and what precisely is
5474 its semantics depending on the type of the reference, but there always is
5475 somewhere hidden one _DECL node that is used to determine the set of
5476 virtual operands for the reference. The code below deciphers this jungle
5477 and extracts this single useful piece of information. */
5479 static tree
5480 get_ref_tag (tree ref, tree orig)
5482 tree var = get_base_address (ref);
5483 tree tag, sv;
5484 unsigned HOST_WIDE_INT offset, size;
5486 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5487 if (okay_component_ref_for_subvars (sv, &offset, &size))
5488 break;
5490 if (handled_component_p (sv))
5491 return unshare_expr (sv);
5493 if (!var)
5494 return NULL_TREE;
5496 if (TREE_CODE (var) == INDIRECT_REF)
5498 /* In case the base is a dereference of a pointer, first check its name
5499 mem tag, and if it does not have one, use type mem tag. */
5500 var = TREE_OPERAND (var, 0);
5501 if (TREE_CODE (var) != SSA_NAME)
5502 return NULL_TREE;
5504 if (SSA_NAME_PTR_INFO (var))
5506 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5507 if (tag)
5508 return tag;
5511 var = SSA_NAME_VAR (var);
5512 tag = var_ann (var)->type_mem_tag;
5513 gcc_assert (tag != NULL_TREE);
5514 return tag;
5516 else
5518 if (!DECL_P (var))
5519 return NULL_TREE;
5521 tag = var_ann (var)->type_mem_tag;
5522 if (tag)
5523 return tag;
5525 return var;
5529 /* Copies the reference information from OLD_REF to NEW_REF. */
5531 static void
5532 copy_ref_info (tree new_ref, tree old_ref)
5534 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5535 copy_mem_ref_info (new_ref, old_ref);
5536 else
5538 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5539 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5543 /* Rewrites USE (address that is an iv) using candidate CAND. */
5545 static void
5546 rewrite_use_address (struct ivopts_data *data,
5547 struct iv_use *use, struct iv_cand *cand)
5549 struct affine_tree_combination aff;
5550 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5551 tree ref;
5553 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5554 unshare_aff_combination (&aff);
5556 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5557 copy_ref_info (ref, *use->op_p);
5558 *use->op_p = ref;
5561 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5562 candidate CAND. */
5564 static void
5565 rewrite_use_compare (struct ivopts_data *data,
5566 struct iv_use *use, struct iv_cand *cand)
5568 tree comp;
5569 tree *op_p, cond, op, stmts, bound;
5570 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5571 enum tree_code compare;
5572 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5574 bound = cp->value;
5575 if (bound)
5577 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5578 tree var_type = TREE_TYPE (var);
5580 compare = iv_elimination_compare (data, use);
5581 bound = fold_convert (var_type, bound);
5582 op = force_gimple_operand (unshare_expr (bound), &stmts,
5583 true, NULL_TREE);
5585 if (stmts)
5586 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5588 *use->op_p = build2 (compare, boolean_type_node, var, op);
5589 update_stmt (use->stmt);
5590 return;
5593 /* The induction variable elimination failed; just express the original
5594 giv. */
5595 comp = get_computation (data->current_loop, use, cand);
5597 cond = *use->op_p;
5598 op_p = &TREE_OPERAND (cond, 0);
5599 if (TREE_CODE (*op_p) != SSA_NAME
5600 || zero_p (get_iv (data, *op_p)->step))
5601 op_p = &TREE_OPERAND (cond, 1);
5603 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5604 if (stmts)
5605 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5607 *op_p = op;
5610 /* Ensure that operand *OP_P may be used at the end of EXIT without
5611 violating loop closed ssa form. */
5613 static void
5614 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5616 basic_block def_bb;
5617 struct loop *def_loop;
5618 tree phi, use;
5620 use = USE_FROM_PTR (op_p);
5621 if (TREE_CODE (use) != SSA_NAME)
5622 return;
5624 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5625 if (!def_bb)
5626 return;
5628 def_loop = def_bb->loop_father;
5629 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5630 return;
5632 /* Try finding a phi node that copies the value out of the loop. */
5633 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5634 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5635 break;
5637 if (!phi)
5639 /* Create such a phi node. */
5640 tree new_name = duplicate_ssa_name (use, NULL);
5642 phi = create_phi_node (new_name, exit->dest);
5643 SSA_NAME_DEF_STMT (new_name) = phi;
5644 add_phi_arg (phi, use, exit);
5647 SET_USE (op_p, PHI_RESULT (phi));
5650 /* Ensure that operands of STMT may be used at the end of EXIT without
5651 violating loop closed ssa form. */
5653 static void
5654 protect_loop_closed_ssa_form (edge exit, tree stmt)
5656 ssa_op_iter iter;
5657 use_operand_p use_p;
5659 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5660 protect_loop_closed_ssa_form_use (exit, use_p);
5663 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5664 so that they are emitted on the correct place, and so that the loop closed
5665 ssa form is preserved. */
5667 void
5668 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5670 tree_stmt_iterator tsi;
5671 block_stmt_iterator bsi;
5672 tree phi, stmt, def, next;
5674 if (!single_pred_p (exit->dest))
5675 split_loop_exit_edge (exit);
5677 /* Ensure there is label in exit->dest, so that we can
5678 insert after it. */
5679 tree_block_label (exit->dest);
5680 bsi = bsi_after_labels (exit->dest);
5682 if (TREE_CODE (stmts) == STATEMENT_LIST)
5684 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5686 tree stmt = tsi_stmt (tsi);
5687 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5688 protect_loop_closed_ssa_form (exit, stmt);
5691 else
5693 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5694 protect_loop_closed_ssa_form (exit, stmts);
5697 if (!op)
5698 return;
5700 for (phi = phi_nodes (exit->dest); phi; phi = next)
5702 next = PHI_CHAIN (phi);
5704 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5706 def = PHI_RESULT (phi);
5707 remove_statement (phi, false);
5708 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5709 def, op);
5710 SSA_NAME_DEF_STMT (def) = stmt;
5711 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5716 /* Rewrites the final value of USE (that is only needed outside of the loop)
5717 using candidate CAND. */
5719 static void
5720 rewrite_use_outer (struct ivopts_data *data,
5721 struct iv_use *use, struct iv_cand *cand)
5723 edge exit;
5724 tree value, op, stmts, tgt;
5725 tree phi;
5727 switch (TREE_CODE (use->stmt))
5729 case PHI_NODE:
5730 tgt = PHI_RESULT (use->stmt);
5731 break;
5732 case MODIFY_EXPR:
5733 tgt = TREE_OPERAND (use->stmt, 0);
5734 break;
5735 default:
5736 gcc_unreachable ();
5739 exit = single_dom_exit (data->current_loop);
5741 if (exit && !(exit->flags & EDGE_COMPLEX))
5743 if (!cand->iv)
5745 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5746 value = unshare_expr (cp->value);
5748 else
5749 value = get_computation_at (data->current_loop,
5750 use, cand, last_stmt (exit->src));
5752 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5754 /* If we will preserve the iv anyway and we would need to perform
5755 some computation to replace the final value, do nothing. */
5756 if (stmts && name_info (data, tgt)->preserve_biv)
5757 return;
5759 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5761 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5763 if (USE_FROM_PTR (use_p) == tgt)
5764 SET_USE (use_p, op);
5767 if (stmts)
5768 compute_phi_arg_on_exit (exit, stmts, op);
5770 /* Enable removal of the statement. We cannot remove it directly,
5771 since we may still need the aliasing information attached to the
5772 ssa name defined by it. */
5773 name_info (data, tgt)->iv->have_use_for = false;
5774 return;
5777 /* If the variable is going to be preserved anyway, there is nothing to
5778 do. */
5779 if (name_info (data, tgt)->preserve_biv)
5780 return;
5782 /* Otherwise we just need to compute the iv. */
5783 rewrite_use_nonlinear_expr (data, use, cand);
5786 /* Rewrites USE using candidate CAND. */
5788 static void
5789 rewrite_use (struct ivopts_data *data,
5790 struct iv_use *use, struct iv_cand *cand)
5792 switch (use->type)
5794 case USE_NONLINEAR_EXPR:
5795 rewrite_use_nonlinear_expr (data, use, cand);
5796 break;
5798 case USE_OUTER:
5799 rewrite_use_outer (data, use, cand);
5800 break;
5802 case USE_ADDRESS:
5803 rewrite_use_address (data, use, cand);
5804 break;
5806 case USE_COMPARE:
5807 rewrite_use_compare (data, use, cand);
5808 break;
5810 default:
5811 gcc_unreachable ();
5813 update_stmt (use->stmt);
5816 /* Rewrite the uses using the selected induction variables. */
5818 static void
5819 rewrite_uses (struct ivopts_data *data)
5821 unsigned i;
5822 struct iv_cand *cand;
5823 struct iv_use *use;
5825 for (i = 0; i < n_iv_uses (data); i++)
5827 use = iv_use (data, i);
5828 cand = use->selected;
5829 gcc_assert (cand);
5831 rewrite_use (data, use, cand);
5835 /* Removes the ivs that are not used after rewriting. */
5837 static void
5838 remove_unused_ivs (struct ivopts_data *data)
5840 unsigned j;
5841 bitmap_iterator bi;
5843 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5845 struct version_info *info;
5847 info = ver_info (data, j);
5848 if (info->iv
5849 && !zero_p (info->iv->step)
5850 && !info->inv_id
5851 && !info->iv->have_use_for
5852 && !info->preserve_biv)
5853 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5857 /* Frees data allocated by the optimization of a single loop. */
5859 static void
5860 free_loop_data (struct ivopts_data *data)
5862 unsigned i, j;
5863 bitmap_iterator bi;
5864 tree obj;
5866 htab_empty (data->niters);
5868 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5870 struct version_info *info;
5872 info = ver_info (data, i);
5873 if (info->iv)
5874 free (info->iv);
5875 info->iv = NULL;
5876 info->has_nonlin_use = false;
5877 info->preserve_biv = false;
5878 info->inv_id = 0;
5880 bitmap_clear (data->relevant);
5881 bitmap_clear (data->important_candidates);
5883 for (i = 0; i < n_iv_uses (data); i++)
5885 struct iv_use *use = iv_use (data, i);
5887 free (use->iv);
5888 BITMAP_FREE (use->related_cands);
5889 for (j = 0; j < use->n_map_members; j++)
5890 if (use->cost_map[j].depends_on)
5891 BITMAP_FREE (use->cost_map[j].depends_on);
5892 free (use->cost_map);
5893 free (use);
5895 VEC_truncate (iv_use_p, data->iv_uses, 0);
5897 for (i = 0; i < n_iv_cands (data); i++)
5899 struct iv_cand *cand = iv_cand (data, i);
5901 if (cand->iv)
5902 free (cand->iv);
5903 if (cand->depends_on)
5904 BITMAP_FREE (cand->depends_on);
5905 free (cand);
5907 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5909 if (data->version_info_size < num_ssa_names)
5911 data->version_info_size = 2 * num_ssa_names;
5912 free (data->version_info);
5913 data->version_info = xcalloc (data->version_info_size,
5914 sizeof (struct version_info));
5917 data->max_inv_id = 0;
5919 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5920 SET_DECL_RTL (obj, NULL_RTX);
5922 VEC_truncate (tree, decl_rtl_to_reset, 0);
5925 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5926 loop tree. */
5928 static void
5929 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5931 unsigned i;
5933 for (i = 1; i < loops->num; i++)
5934 if (loops->parray[i])
5936 free (loops->parray[i]->aux);
5937 loops->parray[i]->aux = NULL;
5940 free_loop_data (data);
5941 free (data->version_info);
5942 BITMAP_FREE (data->relevant);
5943 BITMAP_FREE (data->important_candidates);
5944 htab_delete (data->niters);
5946 VEC_free (tree, heap, decl_rtl_to_reset);
5947 VEC_free (iv_use_p, heap, data->iv_uses);
5948 VEC_free (iv_cand_p, heap, data->iv_candidates);
5951 /* Optimizes the LOOP. Returns true if anything changed. */
5953 static bool
5954 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5956 bool changed = false;
5957 struct iv_ca *iv_ca;
5958 edge exit;
5960 data->current_loop = loop;
5962 if (dump_file && (dump_flags & TDF_DETAILS))
5964 fprintf (dump_file, "Processing loop %d\n", loop->num);
5966 exit = single_dom_exit (loop);
5967 if (exit)
5969 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5970 exit->src->index, exit->dest->index);
5971 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5972 fprintf (dump_file, "\n");
5975 fprintf (dump_file, "\n");
5978 /* For each ssa name determines whether it behaves as an induction variable
5979 in some loop. */
5980 if (!find_induction_variables (data))
5981 goto finish;
5983 /* Finds interesting uses (item 1). */
5984 find_interesting_uses (data);
5985 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5986 goto finish;
5988 /* Finds candidates for the induction variables (item 2). */
5989 find_iv_candidates (data);
5991 /* Calculates the costs (item 3, part 1). */
5992 determine_use_iv_costs (data);
5993 determine_iv_costs (data);
5994 determine_set_costs (data);
5996 /* Find the optimal set of induction variables (item 3, part 2). */
5997 iv_ca = find_optimal_iv_set (data);
5998 if (!iv_ca)
5999 goto finish;
6000 changed = true;
6002 /* Create the new induction variables (item 4, part 1). */
6003 create_new_ivs (data, iv_ca);
6004 iv_ca_free (&iv_ca);
6006 /* Rewrite the uses (item 4, part 2). */
6007 rewrite_uses (data);
6009 /* Remove the ivs that are unused after rewriting. */
6010 remove_unused_ivs (data);
6012 /* We have changed the structure of induction variables; it might happen
6013 that definitions in the scev database refer to some of them that were
6014 eliminated. */
6015 scev_reset ();
6017 finish:
6018 free_loop_data (data);
6020 return changed;
6023 /* Main entry point. Optimizes induction variables in LOOPS. */
6025 void
6026 tree_ssa_iv_optimize (struct loops *loops)
6028 struct loop *loop;
6029 struct ivopts_data data;
6031 tree_ssa_iv_optimize_init (loops, &data);
6033 /* Optimize the loops starting with the innermost ones. */
6034 loop = loops->tree_root;
6035 while (loop->inner)
6036 loop = loop->inner;
6038 /* Scan the loops, inner ones first. */
6039 while (loop != loops->tree_root)
6041 if (dump_file && (dump_flags & TDF_DETAILS))
6042 flow_loop_dump (loop, dump_file, NULL, 1);
6044 tree_ssa_iv_optimize_loop (&data, loop);
6046 if (loop->next)
6048 loop = loop->next;
6049 while (loop->inner)
6050 loop = loop->inner;
6052 else
6053 loop = loop->outer;
6056 tree_ssa_iv_optimize_finalize (loops, &data);