d: Fix error: aggregate value used where floating point was expected
[official-gcc.git] / gcc / ipa-cp.cc
blobdc3f0e94b1789a3e42d4b8a82e3a7d3e3fc7c58c
1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
5 <mjambor@suse.cz>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
34 is deemed good.
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
46 calls are redirected.
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
61 values:
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
94 third stage.
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
101 the second stage. */
103 #include "config.h"
104 #include "system.h"
105 #include "coretypes.h"
106 #include "backend.h"
107 #include "tree.h"
108 #include "gimple-expr.h"
109 #include "gimple.h"
110 #include "predict.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
113 #include "cgraph.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
126 #include "attribs.h"
127 #include "dbgcnt.h"
128 #include "symtab-clones.h"
130 template <typename valtype> class ipcp_value;
132 /* Describes a particular source for an IPA-CP value. */
134 template <typename valtype>
135 struct ipcp_value_source
137 public:
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset;
141 /* The incoming edge that brought the value. */
142 cgraph_edge *cs;
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value<valtype> *val;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source *next;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
152 int index;
155 /* Common ancestor for all ipcp_value instantiations. */
157 class ipcp_value_base
159 public:
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit;
163 /* Time benefit that specializing the function for this value can bring about
164 in it's callees. */
165 sreal prop_time_benefit;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
168 int local_size_cost;
169 /* Size cost that specializing the function for this value can bring about in
170 it's callees. */
171 int prop_size_cost;
173 ipcp_value_base ()
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
178 /* Describes one particular value stored in struct ipcp_lattice. */
180 template <typename valtype>
181 class ipcp_value : public ipcp_value_base
183 public:
184 /* The actual value for the given parameter. */
185 valtype value;
186 /* The list of sources from which this value originates. */
187 ipcp_value_source <valtype> *sources = nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value *next = nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
191 of values. */
192 ipcp_value *scc_next = nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value *topo_next = nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
197 created. */
198 cgraph_node *spec_node = nullptr;
199 /* Depth first search number and low link for topological sorting of
200 values. */
201 int dfs = 0;
202 int low_link = 0;
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
205 int scc_no = 0;
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
210 maximum. */
211 unsigned self_recursion_generated_level = 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack = false;
215 void add_source (cgraph_edge *cs, ipcp_value *src_val, int src_idx,
216 HOST_WIDE_INT offset);
218 /* Return true if both THIS value and O feed into each other. */
220 bool same_scc (const ipcp_value<valtype> *o)
222 return o->scc_no == scc_no;
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
229 bool self_recursion_generated_p ()
231 return self_recursion_generated_level > 0;
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
241 template <typename valtype>
242 struct ipcp_lattice
244 public:
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value<valtype> *values;
249 /* Number of known values and types in this lattice. */
250 int values_count;
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
254 propagation). */
255 bool bottom;
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval, cgraph_edge *cs,
261 ipcp_value<valtype> *src_val = NULL,
262 int src_idx = 0, HOST_WIDE_INT offset = -1,
263 ipcp_value<valtype> **val_p = NULL,
264 unsigned same_lat_gen_level = 0);
265 void print (FILE * f, bool dump_sources, bool dump_benefits);
268 /* Lattice of tree values with an offset to describe a part of an
269 aggregate. */
271 struct ipcp_agg_lattice : public ipcp_lattice<tree>
273 public:
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
278 HOST_WIDE_INT size;
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice *next;
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
285 value are constant.
286 For eg:
287 int f(int x)
289 return some_op (x);
292 int f1(int y)
294 if (cond)
295 return f (y & 0xff);
296 else
297 return f (y & 0xf);
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
306 class ipcp_bits_lattice
308 public:
309 bool bottom_p () const { return m_lattice_val == IPA_BITS_VARYING; }
310 bool top_p () const { return m_lattice_val == IPA_BITS_UNDEFINED; }
311 bool constant_p () const { return m_lattice_val == IPA_BITS_CONSTANT; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int, widest_int);
314 bool known_nonzero_p () const;
316 widest_int get_value () const { return m_value; }
317 widest_int get_mask () const { return m_mask; }
319 bool meet_with (ipcp_bits_lattice& other, unsigned, signop,
320 enum tree_code, tree, bool);
322 bool meet_with (widest_int, widest_int, unsigned);
324 void print (FILE *);
326 private:
327 enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } m_lattice_val;
329 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
330 If a bit in mask is set to 0, then the corresponding bit in
331 value is known to be constant. */
332 widest_int m_value, m_mask;
334 bool meet_with_1 (widest_int, widest_int, unsigned, bool);
335 void get_value_and_mask (tree, widest_int *, widest_int *);
338 /* Lattice of value ranges. */
340 class ipcp_vr_lattice
342 public:
343 value_range m_vr;
345 inline bool bottom_p () const;
346 inline bool top_p () const;
347 inline bool set_to_bottom ();
348 bool meet_with (const value_range *p_vr);
349 bool meet_with (const ipcp_vr_lattice &other);
350 void init () { gcc_assert (m_vr.undefined_p ()); }
351 void print (FILE * f);
353 private:
354 bool meet_with_1 (const value_range *other_vr);
357 /* Structure containing lattices for a parameter itself and for pieces of
358 aggregates that are passed in the parameter or by a reference in a parameter
359 plus some other useful flags. */
361 class ipcp_param_lattices
363 public:
364 /* Lattice describing the value of the parameter itself. */
365 ipcp_lattice<tree> itself;
366 /* Lattice describing the polymorphic contexts of a parameter. */
367 ipcp_lattice<ipa_polymorphic_call_context> ctxlat;
368 /* Lattices describing aggregate parts. */
369 ipcp_agg_lattice *aggs;
370 /* Lattice describing known bits. */
371 ipcp_bits_lattice bits_lattice;
372 /* Lattice describing value range. */
373 ipcp_vr_lattice m_value_range;
374 /* Number of aggregate lattices */
375 int aggs_count;
376 /* True if aggregate data were passed by reference (as opposed to by
377 value). */
378 bool aggs_by_ref;
379 /* All aggregate lattices contain a variable component (in addition to
380 values). */
381 bool aggs_contain_variable;
382 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
383 for any propagation). */
384 bool aggs_bottom;
386 /* There is a virtual call based on this parameter. */
387 bool virt_call;
390 /* Allocation pools for values and their sources in ipa-cp. */
392 object_allocator<ipcp_value<tree> > ipcp_cst_values_pool
393 ("IPA-CP constant values");
395 object_allocator<ipcp_value<ipa_polymorphic_call_context> >
396 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
398 object_allocator<ipcp_value_source<tree> > ipcp_sources_pool
399 ("IPA-CP value sources");
401 object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool
402 ("IPA_CP aggregate lattices");
404 /* Base count to use in heuristics when using profile feedback. */
406 static profile_count base_count;
408 /* Original overall size of the program. */
410 static long overall_size, orig_overall_size;
412 /* Node name to unique clone suffix number map. */
413 static hash_map<const char *, unsigned> *clone_num_suffixes;
415 /* Return the param lattices structure corresponding to the Ith formal
416 parameter of the function described by INFO. */
417 static inline class ipcp_param_lattices *
418 ipa_get_parm_lattices (class ipa_node_params *info, int i)
420 gcc_assert (i >= 0 && i < ipa_get_param_count (info));
421 gcc_checking_assert (!info->ipcp_orig_node);
422 gcc_checking_assert (info->lattices);
423 return &(info->lattices[i]);
426 /* Return the lattice corresponding to the scalar value of the Ith formal
427 parameter of the function described by INFO. */
428 static inline ipcp_lattice<tree> *
429 ipa_get_scalar_lat (class ipa_node_params *info, int i)
431 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
432 return &plats->itself;
435 /* Return the lattice corresponding to the scalar value of the Ith formal
436 parameter of the function described by INFO. */
437 static inline ipcp_lattice<ipa_polymorphic_call_context> *
438 ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
440 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
441 return &plats->ctxlat;
444 /* Return whether LAT is a lattice with a single constant and without an
445 undefined value. */
447 template <typename valtype>
448 inline bool
449 ipcp_lattice<valtype>::is_single_const ()
451 if (bottom || contains_variable || values_count != 1)
452 return false;
453 else
454 return true;
457 /* Print V which is extracted from a value in a lattice to F. */
459 static void
460 print_ipcp_constant_value (FILE * f, tree v)
462 if (TREE_CODE (v) == ADDR_EXPR
463 && TREE_CODE (TREE_OPERAND (v, 0)) == CONST_DECL)
465 fprintf (f, "& ");
466 print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (v, 0)));
468 else
469 print_generic_expr (f, v);
472 /* Print V which is extracted from a value in a lattice to F. */
474 static void
475 print_ipcp_constant_value (FILE * f, ipa_polymorphic_call_context v)
477 v.dump(f, false);
480 /* Print a lattice LAT to F. */
482 template <typename valtype>
483 void
484 ipcp_lattice<valtype>::print (FILE * f, bool dump_sources, bool dump_benefits)
486 ipcp_value<valtype> *val;
487 bool prev = false;
489 if (bottom)
491 fprintf (f, "BOTTOM\n");
492 return;
495 if (!values_count && !contains_variable)
497 fprintf (f, "TOP\n");
498 return;
501 if (contains_variable)
503 fprintf (f, "VARIABLE");
504 prev = true;
505 if (dump_benefits)
506 fprintf (f, "\n");
509 for (val = values; val; val = val->next)
511 if (dump_benefits && prev)
512 fprintf (f, " ");
513 else if (!dump_benefits && prev)
514 fprintf (f, ", ");
515 else
516 prev = true;
518 print_ipcp_constant_value (f, val->value);
520 if (dump_sources)
522 ipcp_value_source<valtype> *s;
524 if (val->self_recursion_generated_p ())
525 fprintf (f, " [self_gen(%i), from:",
526 val->self_recursion_generated_level);
527 else
528 fprintf (f, " [scc: %i, from:", val->scc_no);
529 for (s = val->sources; s; s = s->next)
530 fprintf (f, " %i(%f)", s->cs->caller->order,
531 s->cs->sreal_frequency ().to_double ());
532 fprintf (f, "]");
535 if (dump_benefits)
536 fprintf (f, " [loc_time: %g, loc_size: %i, "
537 "prop_time: %g, prop_size: %i]\n",
538 val->local_time_benefit.to_double (), val->local_size_cost,
539 val->prop_time_benefit.to_double (), val->prop_size_cost);
541 if (!dump_benefits)
542 fprintf (f, "\n");
545 void
546 ipcp_bits_lattice::print (FILE *f)
548 if (top_p ())
549 fprintf (f, " Bits unknown (TOP)\n");
550 else if (bottom_p ())
551 fprintf (f, " Bits unusable (BOTTOM)\n");
552 else
554 fprintf (f, " Bits: value = "); print_hex (get_value (), f);
555 fprintf (f, ", mask = "); print_hex (get_mask (), f);
556 fprintf (f, "\n");
560 /* Print value range lattice to F. */
562 void
563 ipcp_vr_lattice::print (FILE * f)
565 dump_value_range (f, &m_vr);
568 /* Print all ipcp_lattices of all functions to F. */
570 static void
571 print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
573 struct cgraph_node *node;
574 int i, count;
576 fprintf (f, "\nLattices:\n");
577 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
579 class ipa_node_params *info;
581 info = ipa_node_params_sum->get (node);
582 /* Skip unoptimized functions and constprop clones since we don't make
583 lattices for them. */
584 if (!info || info->ipcp_orig_node)
585 continue;
586 fprintf (f, " Node: %s:\n", node->dump_name ());
587 count = ipa_get_param_count (info);
588 for (i = 0; i < count; i++)
590 struct ipcp_agg_lattice *aglat;
591 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
592 fprintf (f, " param [%d]: ", i);
593 plats->itself.print (f, dump_sources, dump_benefits);
594 fprintf (f, " ctxs: ");
595 plats->ctxlat.print (f, dump_sources, dump_benefits);
596 plats->bits_lattice.print (f);
597 fprintf (f, " ");
598 plats->m_value_range.print (f);
599 fprintf (f, "\n");
600 if (plats->virt_call)
601 fprintf (f, " virt_call flag set\n");
603 if (plats->aggs_bottom)
605 fprintf (f, " AGGS BOTTOM\n");
606 continue;
608 if (plats->aggs_contain_variable)
609 fprintf (f, " AGGS VARIABLE\n");
610 for (aglat = plats->aggs; aglat; aglat = aglat->next)
612 fprintf (f, " %soffset " HOST_WIDE_INT_PRINT_DEC ": ",
613 plats->aggs_by_ref ? "ref " : "", aglat->offset);
614 aglat->print (f, dump_sources, dump_benefits);
620 /* Determine whether it is at all technically possible to create clones of NODE
621 and store this information in the ipa_node_params structure associated
622 with NODE. */
624 static void
625 determine_versionability (struct cgraph_node *node,
626 class ipa_node_params *info)
628 const char *reason = NULL;
630 /* There are a number of generic reasons functions cannot be versioned. We
631 also cannot remove parameters if there are type attributes such as fnspec
632 present. */
633 if (node->alias || node->thunk)
634 reason = "alias or thunk";
635 else if (!node->versionable)
636 reason = "not a tree_versionable_function";
637 else if (node->get_availability () <= AVAIL_INTERPOSABLE)
638 reason = "insufficient body availability";
639 else if (!opt_for_fn (node->decl, optimize)
640 || !opt_for_fn (node->decl, flag_ipa_cp))
641 reason = "non-optimized function";
642 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node->decl)))
644 /* Ideally we should clone the SIMD clones themselves and create
645 vector copies of them, so IPA-cp and SIMD clones can happily
646 coexist, but that may not be worth the effort. */
647 reason = "function has SIMD clones";
649 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node->decl)))
651 /* Ideally we should clone the target clones themselves and create
652 copies of them, so IPA-cp and target clones can happily
653 coexist, but that may not be worth the effort. */
654 reason = "function target_clones attribute";
656 /* Don't clone decls local to a comdat group; it breaks and for C++
657 decloned constructors, inlining is always better anyway. */
658 else if (node->comdat_local_p ())
659 reason = "comdat-local function";
660 else if (node->calls_comdat_local)
662 /* TODO: call is versionable if we make sure that all
663 callers are inside of a comdat group. */
664 reason = "calls comdat-local function";
667 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
668 work only when inlined. Cloning them may still lead to better code
669 because ipa-cp will not give up on cloning further. If the function is
670 external this however leads to wrong code because we may end up producing
671 offline copy of the function. */
672 if (DECL_EXTERNAL (node->decl))
673 for (cgraph_edge *edge = node->callees; !reason && edge;
674 edge = edge->next_callee)
675 if (fndecl_built_in_p (edge->callee->decl, BUILT_IN_NORMAL))
677 if (DECL_FUNCTION_CODE (edge->callee->decl) == BUILT_IN_VA_ARG_PACK)
678 reason = "external function which calls va_arg_pack";
679 if (DECL_FUNCTION_CODE (edge->callee->decl)
680 == BUILT_IN_VA_ARG_PACK_LEN)
681 reason = "external function which calls va_arg_pack_len";
684 if (reason && dump_file && !node->alias && !node->thunk)
685 fprintf (dump_file, "Function %s is not versionable, reason: %s.\n",
686 node->dump_name (), reason);
688 info->versionable = (reason == NULL);
691 /* Return true if it is at all technically possible to create clones of a
692 NODE. */
694 static bool
695 ipcp_versionable_function_p (struct cgraph_node *node)
697 ipa_node_params *info = ipa_node_params_sum->get (node);
698 return info && info->versionable;
701 /* Structure holding accumulated information about callers of a node. */
703 struct caller_statistics
705 /* If requested (see below), self-recursive call counts are summed into this
706 field. */
707 profile_count rec_count_sum;
708 /* The sum of all ipa counts of all the other (non-recursive) calls. */
709 profile_count count_sum;
710 /* Sum of all frequencies for all calls. */
711 sreal freq_sum;
712 /* Number of calls and hot calls respectively. */
713 int n_calls, n_hot_calls;
714 /* If itself is set up, also count the number of non-self-recursive
715 calls. */
716 int n_nonrec_calls;
717 /* If non-NULL, this is the node itself and calls from it should have their
718 counts included in rec_count_sum and not count_sum. */
719 cgraph_node *itself;
722 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
723 from IGNORED_CALLER are not counted. */
725 static inline void
726 init_caller_stats (caller_statistics *stats, cgraph_node *itself = NULL)
728 stats->rec_count_sum = profile_count::zero ();
729 stats->count_sum = profile_count::zero ();
730 stats->n_calls = 0;
731 stats->n_hot_calls = 0;
732 stats->n_nonrec_calls = 0;
733 stats->freq_sum = 0;
734 stats->itself = itself;
737 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
738 non-thunk incoming edges to NODE. */
740 static bool
741 gather_caller_stats (struct cgraph_node *node, void *data)
743 struct caller_statistics *stats = (struct caller_statistics *) data;
744 struct cgraph_edge *cs;
746 for (cs = node->callers; cs; cs = cs->next_caller)
747 if (!cs->caller->thunk)
749 ipa_node_params *info = ipa_node_params_sum->get (cs->caller);
750 if (info && info->node_dead)
751 continue;
753 if (cs->count.ipa ().initialized_p ())
755 if (stats->itself && stats->itself == cs->caller)
756 stats->rec_count_sum += cs->count.ipa ();
757 else
758 stats->count_sum += cs->count.ipa ();
760 stats->freq_sum += cs->sreal_frequency ();
761 stats->n_calls++;
762 if (stats->itself && stats->itself != cs->caller)
763 stats->n_nonrec_calls++;
765 if (cs->maybe_hot_p ())
766 stats->n_hot_calls ++;
768 return false;
772 /* Return true if this NODE is viable candidate for cloning. */
774 static bool
775 ipcp_cloning_candidate_p (struct cgraph_node *node)
777 struct caller_statistics stats;
779 gcc_checking_assert (node->has_gimple_body_p ());
781 if (!opt_for_fn (node->decl, flag_ipa_cp_clone))
783 if (dump_file)
784 fprintf (dump_file, "Not considering %s for cloning; "
785 "-fipa-cp-clone disabled.\n",
786 node->dump_name ());
787 return false;
790 if (node->optimize_for_size_p ())
792 if (dump_file)
793 fprintf (dump_file, "Not considering %s for cloning; "
794 "optimizing it for size.\n",
795 node->dump_name ());
796 return false;
799 init_caller_stats (&stats);
800 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
802 if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
804 if (dump_file)
805 fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
806 node->dump_name ());
807 return true;
810 /* When profile is available and function is hot, propagate into it even if
811 calls seems cold; constant propagation can improve function's speed
812 significantly. */
813 if (stats.count_sum > profile_count::zero ()
814 && node->count.ipa ().initialized_p ())
816 if (stats.count_sum > node->count.ipa ().apply_scale (90, 100))
818 if (dump_file)
819 fprintf (dump_file, "Considering %s for cloning; "
820 "usually called directly.\n",
821 node->dump_name ());
822 return true;
825 if (!stats.n_hot_calls)
827 if (dump_file)
828 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
829 node->dump_name ());
830 return false;
832 if (dump_file)
833 fprintf (dump_file, "Considering %s for cloning.\n",
834 node->dump_name ());
835 return true;
838 template <typename valtype>
839 class value_topo_info
841 public:
842 /* Head of the linked list of topologically sorted values. */
843 ipcp_value<valtype> *values_topo;
844 /* Stack for creating SCCs, represented by a linked list too. */
845 ipcp_value<valtype> *stack;
846 /* Counter driving the algorithm in add_val_to_toposort. */
847 int dfs_counter;
849 value_topo_info () : values_topo (NULL), stack (NULL), dfs_counter (0)
851 void add_val (ipcp_value<valtype> *cur_val);
852 void propagate_effects ();
855 /* Arrays representing a topological ordering of call graph nodes and a stack
856 of nodes used during constant propagation and also data required to perform
857 topological sort of values and propagation of benefits in the determined
858 order. */
860 class ipa_topo_info
862 public:
863 /* Array with obtained topological order of cgraph nodes. */
864 struct cgraph_node **order;
865 /* Stack of cgraph nodes used during propagation within SCC until all values
866 in the SCC stabilize. */
867 struct cgraph_node **stack;
868 int nnodes, stack_top;
870 value_topo_info<tree> constants;
871 value_topo_info<ipa_polymorphic_call_context> contexts;
873 ipa_topo_info () : order(NULL), stack(NULL), nnodes(0), stack_top(0),
874 constants ()
878 /* Skip edges from and to nodes without ipa_cp enabled.
879 Ignore not available symbols. */
881 static bool
882 ignore_edge_p (cgraph_edge *e)
884 enum availability avail;
885 cgraph_node *ultimate_target
886 = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
888 return (avail <= AVAIL_INTERPOSABLE
889 || !opt_for_fn (ultimate_target->decl, optimize)
890 || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
893 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
895 static void
896 build_toporder_info (class ipa_topo_info *topo)
898 topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
899 topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
901 gcc_checking_assert (topo->stack_top == 0);
902 topo->nnodes = ipa_reduced_postorder (topo->order, true,
903 ignore_edge_p);
906 /* Free information about strongly connected components and the arrays in
907 TOPO. */
909 static void
910 free_toporder_info (class ipa_topo_info *topo)
912 ipa_free_postorder_info ();
913 free (topo->order);
914 free (topo->stack);
917 /* Add NODE to the stack in TOPO, unless it is already there. */
919 static inline void
920 push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
922 ipa_node_params *info = ipa_node_params_sum->get (node);
923 if (info->node_enqueued)
924 return;
925 info->node_enqueued = 1;
926 topo->stack[topo->stack_top++] = node;
929 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
930 is empty. */
932 static struct cgraph_node *
933 pop_node_from_stack (class ipa_topo_info *topo)
935 if (topo->stack_top)
937 struct cgraph_node *node;
938 topo->stack_top--;
939 node = topo->stack[topo->stack_top];
940 ipa_node_params_sum->get (node)->node_enqueued = 0;
941 return node;
943 else
944 return NULL;
947 /* Set lattice LAT to bottom and return true if it previously was not set as
948 such. */
950 template <typename valtype>
951 inline bool
952 ipcp_lattice<valtype>::set_to_bottom ()
954 bool ret = !bottom;
955 bottom = true;
956 return ret;
959 /* Mark lattice as containing an unknown value and return true if it previously
960 was not marked as such. */
962 template <typename valtype>
963 inline bool
964 ipcp_lattice<valtype>::set_contains_variable ()
966 bool ret = !contains_variable;
967 contains_variable = true;
968 return ret;
971 /* Set all aggregate lattices in PLATS to bottom and return true if they were
972 not previously set as such. */
974 static inline bool
975 set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
977 bool ret = !plats->aggs_bottom;
978 plats->aggs_bottom = true;
979 return ret;
982 /* Mark all aggregate lattices in PLATS as containing an unknown value and
983 return true if they were not previously marked as such. */
985 static inline bool
986 set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
988 bool ret = !plats->aggs_contain_variable;
989 plats->aggs_contain_variable = true;
990 return ret;
993 bool
994 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice &other)
996 return meet_with_1 (&other.m_vr);
999 /* Meet the current value of the lattice with value range described by VR
1000 lattice. */
1002 bool
1003 ipcp_vr_lattice::meet_with (const value_range *p_vr)
1005 return meet_with_1 (p_vr);
1008 /* Meet the current value of the lattice with value range described by
1009 OTHER_VR lattice. Return TRUE if anything changed. */
1011 bool
1012 ipcp_vr_lattice::meet_with_1 (const value_range *other_vr)
1014 if (bottom_p ())
1015 return false;
1017 if (other_vr->varying_p ())
1018 return set_to_bottom ();
1020 value_range save (m_vr);
1021 m_vr.union_ (other_vr);
1022 return !m_vr.equal_p (save);
1025 /* Return true if value range information in the lattice is yet unknown. */
1027 bool
1028 ipcp_vr_lattice::top_p () const
1030 return m_vr.undefined_p ();
1033 /* Return true if value range information in the lattice is known to be
1034 unusable. */
1036 bool
1037 ipcp_vr_lattice::bottom_p () const
1039 return m_vr.varying_p ();
1042 /* Set value range information in the lattice to bottom. Return true if it
1043 previously was in a different state. */
1045 bool
1046 ipcp_vr_lattice::set_to_bottom ()
1048 if (m_vr.varying_p ())
1049 return false;
1050 /* ?? We create all sorts of VARYING ranges for floats, structures,
1051 and other types which we cannot handle as ranges. We should
1052 probably avoid handling them throughout the pass, but it's easier
1053 to create a sensible VARYING here and let the lattice
1054 propagate. */
1055 m_vr.set_varying (integer_type_node);
1056 return true;
1059 /* Set lattice value to bottom, if it already isn't the case. */
1061 bool
1062 ipcp_bits_lattice::set_to_bottom ()
1064 if (bottom_p ())
1065 return false;
1066 m_lattice_val = IPA_BITS_VARYING;
1067 m_value = 0;
1068 m_mask = -1;
1069 return true;
1072 /* Set to constant if it isn't already. Only meant to be called
1073 when switching state from TOP. */
1075 bool
1076 ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask)
1078 gcc_assert (top_p ());
1079 m_lattice_val = IPA_BITS_CONSTANT;
1080 m_value = wi::bit_and (wi::bit_not (mask), value);
1081 m_mask = mask;
1082 return true;
1085 /* Return true if any of the known bits are non-zero. */
1087 bool
1088 ipcp_bits_lattice::known_nonzero_p () const
1090 if (!constant_p ())
1091 return false;
1092 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask), m_value), 0);
1095 /* Convert operand to value, mask form. */
1097 void
1098 ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
1100 wide_int get_nonzero_bits (const_tree);
1102 if (TREE_CODE (operand) == INTEGER_CST)
1104 *valuep = wi::to_widest (operand);
1105 *maskp = 0;
1107 else
1109 *valuep = 0;
1110 *maskp = -1;
1114 /* Meet operation, similar to ccp_lattice_meet, we xor values
1115 if this->value, value have different values at same bit positions, we want
1116 to drop that bit to varying. Return true if mask is changed.
1117 This function assumes that the lattice value is in CONSTANT state. If
1118 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1120 bool
1121 ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask,
1122 unsigned precision, bool drop_all_ones)
1124 gcc_assert (constant_p ());
1126 widest_int old_mask = m_mask;
1127 m_mask = (m_mask | mask) | (m_value ^ value);
1128 if (drop_all_ones)
1129 m_mask |= m_value;
1130 m_value &= ~m_mask;
1132 if (wi::sext (m_mask, precision) == -1)
1133 return set_to_bottom ();
1135 return m_mask != old_mask;
1138 /* Meet the bits lattice with operand
1139 described by <value, mask, sgn, precision. */
1141 bool
1142 ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
1143 unsigned precision)
1145 if (bottom_p ())
1146 return false;
1148 if (top_p ())
1150 if (wi::sext (mask, precision) == -1)
1151 return set_to_bottom ();
1152 return set_to_constant (value, mask);
1155 return meet_with_1 (value, mask, precision, false);
1158 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1159 if code is binary operation or bit_value_unop (other) if code is unary op.
1160 In the case when code is nop_expr, no adjustment is required. If
1161 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1163 bool
1164 ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, unsigned precision,
1165 signop sgn, enum tree_code code, tree operand,
1166 bool drop_all_ones)
1168 if (other.bottom_p ())
1169 return set_to_bottom ();
1171 if (bottom_p () || other.top_p ())
1172 return false;
1174 widest_int adjusted_value, adjusted_mask;
1176 if (TREE_CODE_CLASS (code) == tcc_binary)
1178 tree type = TREE_TYPE (operand);
1179 widest_int o_value, o_mask;
1180 get_value_and_mask (operand, &o_value, &o_mask);
1182 bit_value_binop (code, sgn, precision, &adjusted_value, &adjusted_mask,
1183 sgn, precision, other.get_value (), other.get_mask (),
1184 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);
1186 if (wi::sext (adjusted_mask, precision) == -1)
1187 return set_to_bottom ();
1190 else if (TREE_CODE_CLASS (code) == tcc_unary)
1192 bit_value_unop (code, sgn, precision, &adjusted_value,
1193 &adjusted_mask, sgn, precision, other.get_value (),
1194 other.get_mask ());
1196 if (wi::sext (adjusted_mask, precision) == -1)
1197 return set_to_bottom ();
1200 else
1201 return set_to_bottom ();
1203 if (top_p ())
1205 if (drop_all_ones)
1207 adjusted_mask |= adjusted_value;
1208 adjusted_value &= ~adjusted_mask;
1210 if (wi::sext (adjusted_mask, precision) == -1)
1211 return set_to_bottom ();
1212 return set_to_constant (adjusted_value, adjusted_mask);
1214 else
1215 return meet_with_1 (adjusted_value, adjusted_mask, precision,
1216 drop_all_ones);
1219 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1220 return true is any of them has not been marked as such so far. */
1222 static inline bool
1223 set_all_contains_variable (class ipcp_param_lattices *plats)
1225 bool ret;
1226 ret = plats->itself.set_contains_variable ();
1227 ret |= plats->ctxlat.set_contains_variable ();
1228 ret |= set_agg_lats_contain_variable (plats);
1229 ret |= plats->bits_lattice.set_to_bottom ();
1230 ret |= plats->m_value_range.set_to_bottom ();
1231 return ret;
1234 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1235 points to by the number of callers to NODE. */
1237 static bool
1238 count_callers (cgraph_node *node, void *data)
1240 int *caller_count = (int *) data;
1242 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
1243 /* Local thunks can be handled transparently, but if the thunk cannot
1244 be optimized out, count it as a real use. */
1245 if (!cs->caller->thunk || !cs->caller->local)
1246 ++*caller_count;
1247 return false;
1250 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1251 the one caller of some other node. Set the caller's corresponding flag. */
1253 static bool
1254 set_single_call_flag (cgraph_node *node, void *)
1256 cgraph_edge *cs = node->callers;
1257 /* Local thunks can be handled transparently, skip them. */
1258 while (cs && cs->caller->thunk && cs->caller->local)
1259 cs = cs->next_caller;
1260 if (cs)
1261 if (ipa_node_params* info = ipa_node_params_sum->get (cs->caller))
1263 info->node_calling_single_call = true;
1264 return true;
1266 return false;
1269 /* Initialize ipcp_lattices. */
1271 static void
1272 initialize_node_lattices (struct cgraph_node *node)
1274 ipa_node_params *info = ipa_node_params_sum->get (node);
1275 struct cgraph_edge *ie;
1276 bool disable = false, variable = false;
1277 int i;
1279 gcc_checking_assert (node->has_gimple_body_p ());
1281 if (!ipa_get_param_count (info))
1282 disable = true;
1283 else if (node->local)
1285 int caller_count = 0;
1286 node->call_for_symbol_thunks_and_aliases (count_callers, &caller_count,
1287 true);
1288 gcc_checking_assert (caller_count > 0);
1289 if (caller_count == 1)
1290 node->call_for_symbol_thunks_and_aliases (set_single_call_flag,
1291 NULL, true);
1293 else
1295 /* When cloning is allowed, we can assume that externally visible
1296 functions are not called. We will compensate this by cloning
1297 later. */
1298 if (ipcp_versionable_function_p (node)
1299 && ipcp_cloning_candidate_p (node))
1300 variable = true;
1301 else
1302 disable = true;
1305 if (dump_file && (dump_flags & TDF_DETAILS)
1306 && !node->alias && !node->thunk)
1308 fprintf (dump_file, "Initializing lattices of %s\n",
1309 node->dump_name ());
1310 if (disable || variable)
1311 fprintf (dump_file, " Marking all lattices as %s\n",
1312 disable ? "BOTTOM" : "VARIABLE");
1315 auto_vec<bool, 16> surviving_params;
1316 bool pre_modified = false;
1318 clone_info *cinfo = clone_info::get (node);
1320 if (!disable && cinfo && cinfo->param_adjustments)
1322 /* At the moment all IPA optimizations should use the number of
1323 parameters of the prevailing decl as the m_always_copy_start.
1324 Handling any other value would complicate the code below, so for the
1325 time bing let's only assert it is so. */
1326 gcc_assert ((cinfo->param_adjustments->m_always_copy_start
1327 == ipa_get_param_count (info))
1328 || cinfo->param_adjustments->m_always_copy_start < 0);
1330 pre_modified = true;
1331 cinfo->param_adjustments->get_surviving_params (&surviving_params);
1333 if (dump_file && (dump_flags & TDF_DETAILS)
1334 && !node->alias && !node->thunk)
1336 bool first = true;
1337 for (int j = 0; j < ipa_get_param_count (info); j++)
1339 if (j < (int) surviving_params.length ()
1340 && surviving_params[j])
1341 continue;
1342 if (first)
1344 fprintf (dump_file,
1345 " The following parameters are dead on arrival:");
1346 first = false;
1348 fprintf (dump_file, " %u", j);
1350 if (!first)
1351 fprintf (dump_file, "\n");
1355 for (i = 0; i < ipa_get_param_count (info); i++)
1357 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
1358 if (disable
1359 || !ipa_get_type (info, i)
1360 || (pre_modified && (surviving_params.length () <= (unsigned) i
1361 || !surviving_params[i])))
1363 plats->itself.set_to_bottom ();
1364 plats->ctxlat.set_to_bottom ();
1365 set_agg_lats_to_bottom (plats);
1366 plats->bits_lattice.set_to_bottom ();
1367 plats->m_value_range.m_vr = value_range ();
1368 plats->m_value_range.set_to_bottom ();
1370 else
1372 plats->m_value_range.init ();
1373 if (variable)
1374 set_all_contains_variable (plats);
1378 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
1379 if (ie->indirect_info->polymorphic
1380 && ie->indirect_info->param_index >= 0)
1382 gcc_checking_assert (ie->indirect_info->param_index >= 0);
1383 ipa_get_parm_lattices (info,
1384 ie->indirect_info->param_index)->virt_call = 1;
1388 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1389 PARAM_TYPE. */
1391 static bool
1392 ipacp_value_safe_for_type (tree param_type, tree value)
1394 tree val_type = TREE_TYPE (value);
1395 if (param_type == val_type
1396 || useless_type_conversion_p (param_type, val_type)
1397 || fold_convertible_p (param_type, value))
1398 return true;
1399 else
1400 return false;
1403 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1405 static bool
1406 values_equal_for_ipcp_p (tree x, tree y)
1408 gcc_checking_assert (x != NULL_TREE && y != NULL_TREE);
1410 if (x == y)
1411 return true;
1413 if (TREE_CODE (x) == ADDR_EXPR
1414 && TREE_CODE (y) == ADDR_EXPR
1415 && TREE_CODE (TREE_OPERAND (x, 0)) == CONST_DECL
1416 && TREE_CODE (TREE_OPERAND (y, 0)) == CONST_DECL)
1417 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x, 0)),
1418 DECL_INITIAL (TREE_OPERAND (y, 0)), 0);
1419 else
1420 return operand_equal_p (x, y, 0);
1423 /* Return the result of a (possibly arithmetic) operation on the constant
1424 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1425 the type of the parameter to which the result is passed. Return
1426 NULL_TREE if that cannot be determined or be considered an
1427 interprocedural invariant. */
1429 static tree
1430 ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
1431 tree res_type)
1433 tree res;
1435 if (opcode == NOP_EXPR)
1436 return input;
1437 if (!is_gimple_ip_invariant (input))
1438 return NULL_TREE;
1440 if (opcode == ASSERT_EXPR)
1442 if (values_equal_for_ipcp_p (input, operand))
1443 return input;
1444 else
1445 return NULL_TREE;
1448 if (!res_type)
1450 if (TREE_CODE_CLASS (opcode) == tcc_comparison)
1451 res_type = boolean_type_node;
1452 else if (expr_type_first_operand_type_p (opcode))
1453 res_type = TREE_TYPE (input);
1454 else
1455 return NULL_TREE;
1458 if (TREE_CODE_CLASS (opcode) == tcc_unary)
1459 res = fold_unary (opcode, res_type, input);
1460 else
1461 res = fold_binary (opcode, res_type, input, operand);
1463 if (res && !is_gimple_ip_invariant (res))
1464 return NULL_TREE;
1466 return res;
1469 /* Return the result of a (possibly arithmetic) pass through jump function
1470 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1471 to which the result is passed. Return NULL_TREE if that cannot be
1472 determined or be considered an interprocedural invariant. */
1474 static tree
1475 ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
1476 tree res_type)
1478 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
1479 input,
1480 ipa_get_jf_pass_through_operand (jfunc),
1481 res_type);
1484 /* Return the result of an ancestor jump function JFUNC on the constant value
1485 INPUT. Return NULL_TREE if that cannot be determined. */
1487 static tree
1488 ipa_get_jf_ancestor_result (struct ipa_jump_func *jfunc, tree input)
1490 gcc_checking_assert (TREE_CODE (input) != TREE_BINFO);
1491 if (TREE_CODE (input) == ADDR_EXPR)
1493 gcc_checking_assert (is_gimple_ip_invariant_address (input));
1494 poly_int64 off = ipa_get_jf_ancestor_offset (jfunc);
1495 if (known_eq (off, 0))
1496 return input;
1497 poly_int64 byte_offset = exact_div (off, BITS_PER_UNIT);
1498 return build1 (ADDR_EXPR, TREE_TYPE (input),
1499 fold_build2 (MEM_REF, TREE_TYPE (TREE_TYPE (input)), input,
1500 build_int_cst (ptr_type_node, byte_offset)));
1502 else if (ipa_get_jf_ancestor_keep_null (jfunc)
1503 && zerop (input))
1504 return input;
1505 else
1506 return NULL_TREE;
1509 /* Determine whether JFUNC evaluates to a single known constant value and if
1510 so, return it. Otherwise return NULL. INFO describes the caller node or
1511 the one it is inlined to, so that pass-through jump functions can be
1512 evaluated. PARM_TYPE is the type of the parameter to which the result is
1513 passed. */
1515 tree
1516 ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
1517 tree parm_type)
1519 if (jfunc->type == IPA_JF_CONST)
1520 return ipa_get_jf_constant (jfunc);
1521 else if (jfunc->type == IPA_JF_PASS_THROUGH
1522 || jfunc->type == IPA_JF_ANCESTOR)
1524 tree input;
1525 int idx;
1527 if (jfunc->type == IPA_JF_PASS_THROUGH)
1528 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1529 else
1530 idx = ipa_get_jf_ancestor_formal_id (jfunc);
1532 if (info->ipcp_orig_node)
1533 input = info->known_csts[idx];
1534 else
1536 ipcp_lattice<tree> *lat;
1538 if (!info->lattices
1539 || idx >= ipa_get_param_count (info))
1540 return NULL_TREE;
1541 lat = ipa_get_scalar_lat (info, idx);
1542 if (!lat->is_single_const ())
1543 return NULL_TREE;
1544 input = lat->values->value;
1547 if (!input)
1548 return NULL_TREE;
1550 if (jfunc->type == IPA_JF_PASS_THROUGH)
1551 return ipa_get_jf_pass_through_result (jfunc, input, parm_type);
1552 else
1553 return ipa_get_jf_ancestor_result (jfunc, input);
1555 else
1556 return NULL_TREE;
1559 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1560 that INFO describes the caller node or the one it is inlined to, CS is the
1561 call graph edge corresponding to JFUNC and CSIDX index of the described
1562 parameter. */
1564 ipa_polymorphic_call_context
1565 ipa_context_from_jfunc (ipa_node_params *info, cgraph_edge *cs, int csidx,
1566 ipa_jump_func *jfunc)
1568 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
1569 ipa_polymorphic_call_context ctx;
1570 ipa_polymorphic_call_context *edge_ctx
1571 = cs ? ipa_get_ith_polymorhic_call_context (args, csidx) : NULL;
1573 if (edge_ctx && !edge_ctx->useless_p ())
1574 ctx = *edge_ctx;
1576 if (jfunc->type == IPA_JF_PASS_THROUGH
1577 || jfunc->type == IPA_JF_ANCESTOR)
1579 ipa_polymorphic_call_context srcctx;
1580 int srcidx;
1581 bool type_preserved = true;
1582 if (jfunc->type == IPA_JF_PASS_THROUGH)
1584 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
1585 return ctx;
1586 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
1587 srcidx = ipa_get_jf_pass_through_formal_id (jfunc);
1589 else
1591 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
1592 srcidx = ipa_get_jf_ancestor_formal_id (jfunc);
1594 if (info->ipcp_orig_node)
1596 if (info->known_contexts.exists ())
1597 srcctx = info->known_contexts[srcidx];
1599 else
1601 if (!info->lattices
1602 || srcidx >= ipa_get_param_count (info))
1603 return ctx;
1604 ipcp_lattice<ipa_polymorphic_call_context> *lat;
1605 lat = ipa_get_poly_ctx_lat (info, srcidx);
1606 if (!lat->is_single_const ())
1607 return ctx;
1608 srcctx = lat->values->value;
1610 if (srcctx.useless_p ())
1611 return ctx;
1612 if (jfunc->type == IPA_JF_ANCESTOR)
1613 srcctx.offset_by (ipa_get_jf_ancestor_offset (jfunc));
1614 if (!type_preserved)
1615 srcctx.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
1616 srcctx.combine_with (ctx);
1617 return srcctx;
1620 return ctx;
1623 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1624 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1625 the result is a range or an anti-range. */
1627 static bool
1628 ipa_vr_operation_and_type_effects (value_range *dst_vr,
1629 value_range *src_vr,
1630 enum tree_code operation,
1631 tree dst_type, tree src_type)
1633 range_fold_unary_expr (dst_vr, operation, dst_type, src_vr, src_type);
1634 if (dst_vr->varying_p () || dst_vr->undefined_p ())
1635 return false;
1636 return true;
1639 /* Determine value_range of JFUNC given that INFO describes the caller node or
1640 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1641 and PARM_TYPE of the parameter. */
1643 value_range
1644 ipa_value_range_from_jfunc (ipa_node_params *info, cgraph_edge *cs,
1645 ipa_jump_func *jfunc, tree parm_type)
1647 value_range vr;
1648 if (jfunc->m_vr)
1649 ipa_vr_operation_and_type_effects (&vr,
1650 jfunc->m_vr,
1651 NOP_EXPR, parm_type,
1652 jfunc->m_vr->type ());
1653 if (vr.singleton_p ())
1654 return vr;
1655 if (jfunc->type == IPA_JF_PASS_THROUGH)
1657 int idx;
1658 ipcp_transformation *sum
1659 = ipcp_get_transformation_summary (cs->caller->inlined_to
1660 ? cs->caller->inlined_to
1661 : cs->caller);
1662 if (!sum || !sum->m_vr)
1663 return vr;
1665 idx = ipa_get_jf_pass_through_formal_id (jfunc);
1667 if (!(*sum->m_vr)[idx].known)
1668 return vr;
1669 tree vr_type = ipa_get_type (info, idx);
1670 value_range srcvr (wide_int_to_tree (vr_type, (*sum->m_vr)[idx].min),
1671 wide_int_to_tree (vr_type, (*sum->m_vr)[idx].max),
1672 (*sum->m_vr)[idx].type);
1674 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
1676 if (TREE_CODE_CLASS (operation) == tcc_unary)
1678 value_range res;
1680 if (ipa_vr_operation_and_type_effects (&res,
1681 &srcvr,
1682 operation, parm_type,
1683 vr_type))
1684 vr.intersect (res);
1686 else
1688 value_range op_res, res;
1689 tree op = ipa_get_jf_pass_through_operand (jfunc);
1690 value_range op_vr (op, op);
1692 range_fold_binary_expr (&op_res, operation, vr_type, &srcvr, &op_vr);
1693 if (ipa_vr_operation_and_type_effects (&res,
1694 &op_res,
1695 NOP_EXPR, parm_type,
1696 vr_type))
1697 vr.intersect (res);
1700 return vr;
1703 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1704 parameter with the given INDEX. */
1706 static tree
1707 get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
1708 int index)
1710 struct ipa_agg_replacement_value *aggval;
1712 aggval = ipa_get_agg_replacements_for_node (node);
1713 while (aggval)
1715 if (aggval->offset == offset
1716 && aggval->index == index)
1717 return aggval->value;
1718 aggval = aggval->next;
1720 return NULL_TREE;
1723 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1724 single known constant value and if so, return it. Otherwise return NULL.
1725 NODE and INFO describes the caller node or the one it is inlined to, and
1726 its related info. */
1728 static tree
1729 ipa_agg_value_from_node (class ipa_node_params *info,
1730 struct cgraph_node *node,
1731 struct ipa_agg_jf_item *item)
1733 tree value = NULL_TREE;
1734 int src_idx;
1736 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
1737 return NULL_TREE;
1739 if (item->jftype == IPA_JF_CONST)
1740 return item->value.constant;
1742 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
1743 || item->jftype == IPA_JF_LOAD_AGG);
1745 src_idx = item->value.pass_through.formal_id;
1747 if (info->ipcp_orig_node)
1749 if (item->jftype == IPA_JF_PASS_THROUGH)
1750 value = info->known_csts[src_idx];
1751 else
1752 value = get_clone_agg_value (node, item->value.load_agg.offset,
1753 src_idx);
1755 else if (info->lattices)
1757 class ipcp_param_lattices *src_plats
1758 = ipa_get_parm_lattices (info, src_idx);
1760 if (item->jftype == IPA_JF_PASS_THROUGH)
1762 struct ipcp_lattice<tree> *lat = &src_plats->itself;
1764 if (!lat->is_single_const ())
1765 return NULL_TREE;
1767 value = lat->values->value;
1769 else if (src_plats->aggs
1770 && !src_plats->aggs_bottom
1771 && !src_plats->aggs_contain_variable
1772 && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
1774 struct ipcp_agg_lattice *aglat;
1776 for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
1778 if (aglat->offset > item->value.load_agg.offset)
1779 break;
1781 if (aglat->offset == item->value.load_agg.offset)
1783 if (aglat->is_single_const ())
1784 value = aglat->values->value;
1785 break;
1791 if (!value)
1792 return NULL_TREE;
1794 if (item->jftype == IPA_JF_LOAD_AGG)
1796 tree load_type = item->value.load_agg.type;
1797 tree value_type = TREE_TYPE (value);
1799 /* Ensure value type is compatible with load type. */
1800 if (!useless_type_conversion_p (load_type, value_type))
1801 return NULL_TREE;
1804 return ipa_get_jf_arith_result (item->value.pass_through.operation,
1805 value,
1806 item->value.pass_through.operand,
1807 item->type);
1810 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1811 an aggregate and if so, return it. Otherwise return an empty set. NODE
1812 and INFO describes the caller node or the one it is inlined to, and its
1813 related info. */
1815 struct ipa_agg_value_set
1816 ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
1817 struct ipa_agg_jump_function *agg_jfunc)
1819 struct ipa_agg_value_set agg;
1820 struct ipa_agg_jf_item *item;
1821 int i;
1823 agg.items = vNULL;
1824 agg.by_ref = agg_jfunc->by_ref;
1826 FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
1828 tree value = ipa_agg_value_from_node (info, node, item);
1830 if (value)
1832 struct ipa_agg_value value_item;
1834 value_item.offset = item->offset;
1835 value_item.value = value;
1837 agg.items.safe_push (value_item);
1840 return agg;
1843 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1844 bottom, not containing a variable component and without any known value at
1845 the same time. */
1847 DEBUG_FUNCTION void
1848 ipcp_verify_propagated_values (void)
1850 struct cgraph_node *node;
1852 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
1854 ipa_node_params *info = ipa_node_params_sum->get (node);
1855 if (!opt_for_fn (node->decl, flag_ipa_cp)
1856 || !opt_for_fn (node->decl, optimize))
1857 continue;
1858 int i, count = ipa_get_param_count (info);
1860 for (i = 0; i < count; i++)
1862 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (info, i);
1864 if (!lat->bottom
1865 && !lat->contains_variable
1866 && lat->values_count == 0)
1868 if (dump_file)
1870 symtab->dump (dump_file);
1871 fprintf (dump_file, "\nIPA lattices after constant "
1872 "propagation, before gcc_unreachable:\n");
1873 print_all_lattices (dump_file, true, false);
1876 gcc_unreachable ();
1882 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1884 static bool
1885 values_equal_for_ipcp_p (ipa_polymorphic_call_context x,
1886 ipa_polymorphic_call_context y)
1888 return x.equal_to (y);
1892 /* Add a new value source to the value represented by THIS, marking that a
1893 value comes from edge CS and (if the underlying jump function is a
1894 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1895 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1896 scalar value of the parameter itself or the offset within an aggregate. */
1898 template <typename valtype>
1899 void
1900 ipcp_value<valtype>::add_source (cgraph_edge *cs, ipcp_value *src_val,
1901 int src_idx, HOST_WIDE_INT offset)
1903 ipcp_value_source<valtype> *src;
1905 src = new (ipcp_sources_pool.allocate ()) ipcp_value_source<valtype>;
1906 src->offset = offset;
1907 src->cs = cs;
1908 src->val = src_val;
1909 src->index = src_idx;
1911 src->next = sources;
1912 sources = src;
1915 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1916 SOURCE and clear all other fields. */
1918 static ipcp_value<tree> *
1919 allocate_and_init_ipcp_value (tree cst, unsigned same_lat_gen_level)
1921 ipcp_value<tree> *val;
1923 val = new (ipcp_cst_values_pool.allocate ()) ipcp_value<tree>();
1924 val->value = cst;
1925 val->self_recursion_generated_level = same_lat_gen_level;
1926 return val;
1929 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1930 value to SOURCE and clear all other fields. */
1932 static ipcp_value<ipa_polymorphic_call_context> *
1933 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx,
1934 unsigned same_lat_gen_level)
1936 ipcp_value<ipa_polymorphic_call_context> *val;
1938 val = new (ipcp_poly_ctx_values_pool.allocate ())
1939 ipcp_value<ipa_polymorphic_call_context>();
1940 val->value = ctx;
1941 val->self_recursion_generated_level = same_lat_gen_level;
1942 return val;
1945 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1946 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1947 meaning. OFFSET -1 means the source is scalar and not a part of an
1948 aggregate. If non-NULL, VAL_P records address of existing or newly added
1949 ipcp_value.
1951 If the value is generated for a self-recursive call as a result of an
1952 arithmetic pass-through jump-function acting on a value in the same lattice,
1953 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1954 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1956 template <typename valtype>
1957 bool
1958 ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
1959 ipcp_value<valtype> *src_val,
1960 int src_idx, HOST_WIDE_INT offset,
1961 ipcp_value<valtype> **val_p,
1962 unsigned same_lat_gen_level)
1964 ipcp_value<valtype> *val, *last_val = NULL;
1966 if (val_p)
1967 *val_p = NULL;
1969 if (bottom)
1970 return false;
1972 for (val = values; val; last_val = val, val = val->next)
1973 if (values_equal_for_ipcp_p (val->value, newval))
1975 if (val_p)
1976 *val_p = val;
1978 if (val->self_recursion_generated_level < same_lat_gen_level)
1979 val->self_recursion_generated_level = same_lat_gen_level;
1981 if (ipa_edge_within_scc (cs))
1983 ipcp_value_source<valtype> *s;
1984 for (s = val->sources; s; s = s->next)
1985 if (s->cs == cs && s->val == src_val)
1986 break;
1987 if (s)
1988 return false;
1991 val->add_source (cs, src_val, src_idx, offset);
1992 return false;
1995 if (!same_lat_gen_level && values_count == opt_for_fn (cs->caller->decl,
1996 param_ipa_cp_value_list_size))
1998 /* We can only free sources, not the values themselves, because sources
1999 of other values in this SCC might point to them. */
2000 for (val = values; val; val = val->next)
2002 while (val->sources)
2004 ipcp_value_source<valtype> *src = val->sources;
2005 val->sources = src->next;
2006 ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
2009 values = NULL;
2010 return set_to_bottom ();
2013 values_count++;
2014 val = allocate_and_init_ipcp_value (newval, same_lat_gen_level);
2015 val->add_source (cs, src_val, src_idx, offset);
2016 val->next = NULL;
2018 /* Add the new value to end of value list, which can reduce iterations
2019 of propagation stage for recursive function. */
2020 if (last_val)
2021 last_val->next = val;
2022 else
2023 values = val;
2025 if (val_p)
2026 *val_p = val;
2028 return true;
2031 /* A helper function that returns result of operation specified by OPCODE on
2032 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2033 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2034 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2035 the result. */
2037 static tree
2038 get_val_across_arith_op (enum tree_code opcode,
2039 tree opnd1_type,
2040 tree opnd2,
2041 ipcp_value<tree> *src_val,
2042 tree res_type)
2044 tree opnd1 = src_val->value;
2046 /* Skip source values that is incompatible with specified type. */
2047 if (opnd1_type
2048 && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
2049 return NULL_TREE;
2051 return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
2054 /* Propagate values through an arithmetic transformation described by a jump
2055 function associated with edge CS, taking values from SRC_LAT and putting
2056 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2057 OPND2 is a constant value if transformation is a binary operation.
2058 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2059 a part of the aggregate. SRC_IDX is the index of the source parameter.
2060 RES_TYPE is the value type of result being propagated into. Return true if
2061 DEST_LAT changed. */
2063 static bool
2064 propagate_vals_across_arith_jfunc (cgraph_edge *cs,
2065 enum tree_code opcode,
2066 tree opnd1_type,
2067 tree opnd2,
2068 ipcp_lattice<tree> *src_lat,
2069 ipcp_lattice<tree> *dest_lat,
2070 HOST_WIDE_INT src_offset,
2071 int src_idx,
2072 tree res_type)
2074 ipcp_value<tree> *src_val;
2075 bool ret = false;
2077 /* Due to circular dependencies, propagating within an SCC through arithmetic
2078 transformation would create infinite number of values. But for
2079 self-feeding recursive function, we could allow propagation in a limited
2080 count, and this can enable a simple kind of recursive function versioning.
2081 For other scenario, we would just make lattices bottom. */
2082 if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
2084 int i;
2086 int max_recursive_depth = opt_for_fn(cs->caller->decl,
2087 param_ipa_cp_max_recursive_depth);
2088 if (src_lat != dest_lat || max_recursive_depth < 1)
2089 return dest_lat->set_contains_variable ();
2091 /* No benefit if recursive execution is in low probability. */
2092 if (cs->sreal_frequency () * 100
2093 <= ((sreal) 1) * opt_for_fn (cs->caller->decl,
2094 param_ipa_cp_min_recursive_probability))
2095 return dest_lat->set_contains_variable ();
2097 auto_vec<ipcp_value<tree> *, 8> val_seeds;
2099 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2101 /* Now we do not use self-recursively generated value as propagation
2102 source, this is absolutely conservative, but could avoid explosion
2103 of lattice's value space, especially when one recursive function
2104 calls another recursive. */
2105 if (src_val->self_recursion_generated_p ())
2107 ipcp_value_source<tree> *s;
2109 /* If the lattice has already been propagated for the call site,
2110 no need to do that again. */
2111 for (s = src_val->sources; s; s = s->next)
2112 if (s->cs == cs)
2113 return dest_lat->set_contains_variable ();
2115 else
2116 val_seeds.safe_push (src_val);
2119 gcc_assert ((int) val_seeds.length () <= param_ipa_cp_value_list_size);
2121 /* Recursively generate lattice values with a limited count. */
2122 FOR_EACH_VEC_ELT (val_seeds, i, src_val)
2124 for (int j = 1; j < max_recursive_depth; j++)
2126 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2127 src_val, res_type);
2128 if (!cstval
2129 || !ipacp_value_safe_for_type (res_type, cstval))
2130 break;
2132 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2133 src_offset, &src_val, j);
2134 gcc_checking_assert (src_val);
2137 ret |= dest_lat->set_contains_variable ();
2139 else
2140 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2142 /* Now we do not use self-recursively generated value as propagation
2143 source, otherwise it is easy to make value space of normal lattice
2144 overflow. */
2145 if (src_val->self_recursion_generated_p ())
2147 ret |= dest_lat->set_contains_variable ();
2148 continue;
2151 tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
2152 src_val, res_type);
2153 if (cstval
2154 && ipacp_value_safe_for_type (res_type, cstval))
2155 ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
2156 src_offset);
2157 else
2158 ret |= dest_lat->set_contains_variable ();
2161 return ret;
2164 /* Propagate values through a pass-through jump function JFUNC associated with
2165 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2166 is the index of the source parameter. PARM_TYPE is the type of the
2167 parameter to which the result is passed. */
2169 static bool
2170 propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
2171 ipcp_lattice<tree> *src_lat,
2172 ipcp_lattice<tree> *dest_lat, int src_idx,
2173 tree parm_type)
2175 return propagate_vals_across_arith_jfunc (cs,
2176 ipa_get_jf_pass_through_operation (jfunc),
2177 NULL_TREE,
2178 ipa_get_jf_pass_through_operand (jfunc),
2179 src_lat, dest_lat, -1, src_idx, parm_type);
2182 /* Propagate values through an ancestor jump function JFUNC associated with
2183 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2184 is the index of the source parameter. */
2186 static bool
2187 propagate_vals_across_ancestor (struct cgraph_edge *cs,
2188 struct ipa_jump_func *jfunc,
2189 ipcp_lattice<tree> *src_lat,
2190 ipcp_lattice<tree> *dest_lat, int src_idx,
2191 tree param_type)
2193 ipcp_value<tree> *src_val;
2194 bool ret = false;
2196 if (ipa_edge_within_scc (cs))
2197 return dest_lat->set_contains_variable ();
2199 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2201 tree t = ipa_get_jf_ancestor_result (jfunc, src_val->value);
2203 if (t && ipacp_value_safe_for_type (param_type, t))
2204 ret |= dest_lat->add_value (t, cs, src_val, src_idx);
2205 else
2206 ret |= dest_lat->set_contains_variable ();
2209 return ret;
2212 /* Propagate scalar values across jump function JFUNC that is associated with
2213 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2214 parameter to which the result is passed. */
2216 static bool
2217 propagate_scalar_across_jump_function (struct cgraph_edge *cs,
2218 struct ipa_jump_func *jfunc,
2219 ipcp_lattice<tree> *dest_lat,
2220 tree param_type)
2222 if (dest_lat->bottom)
2223 return false;
2225 if (jfunc->type == IPA_JF_CONST)
2227 tree val = ipa_get_jf_constant (jfunc);
2228 if (ipacp_value_safe_for_type (param_type, val))
2229 return dest_lat->add_value (val, cs, NULL, 0);
2230 else
2231 return dest_lat->set_contains_variable ();
2233 else if (jfunc->type == IPA_JF_PASS_THROUGH
2234 || jfunc->type == IPA_JF_ANCESTOR)
2236 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2237 ipcp_lattice<tree> *src_lat;
2238 int src_idx;
2239 bool ret;
2241 if (jfunc->type == IPA_JF_PASS_THROUGH)
2242 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2243 else
2244 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2246 src_lat = ipa_get_scalar_lat (caller_info, src_idx);
2247 if (src_lat->bottom)
2248 return dest_lat->set_contains_variable ();
2250 /* If we would need to clone the caller and cannot, do not propagate. */
2251 if (!ipcp_versionable_function_p (cs->caller)
2252 && (src_lat->contains_variable
2253 || (src_lat->values_count > 1)))
2254 return dest_lat->set_contains_variable ();
2256 if (jfunc->type == IPA_JF_PASS_THROUGH)
2257 ret = propagate_vals_across_pass_through (cs, jfunc, src_lat,
2258 dest_lat, src_idx,
2259 param_type);
2260 else
2261 ret = propagate_vals_across_ancestor (cs, jfunc, src_lat, dest_lat,
2262 src_idx, param_type);
2264 if (src_lat->contains_variable)
2265 ret |= dest_lat->set_contains_variable ();
2267 return ret;
2270 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2271 use it for indirect inlining), we should propagate them too. */
2272 return dest_lat->set_contains_variable ();
2275 /* Propagate scalar values across jump function JFUNC that is associated with
2276 edge CS and describes argument IDX and put the values into DEST_LAT. */
2278 static bool
2279 propagate_context_across_jump_function (cgraph_edge *cs,
2280 ipa_jump_func *jfunc, int idx,
2281 ipcp_lattice<ipa_polymorphic_call_context> *dest_lat)
2283 if (dest_lat->bottom)
2284 return false;
2285 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
2286 bool ret = false;
2287 bool added_sth = false;
2288 bool type_preserved = true;
2290 ipa_polymorphic_call_context edge_ctx, *edge_ctx_ptr
2291 = ipa_get_ith_polymorhic_call_context (args, idx);
2293 if (edge_ctx_ptr)
2294 edge_ctx = *edge_ctx_ptr;
2296 if (jfunc->type == IPA_JF_PASS_THROUGH
2297 || jfunc->type == IPA_JF_ANCESTOR)
2299 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2300 int src_idx;
2301 ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
2303 /* TODO: Once we figure out how to propagate speculations, it will
2304 probably be a good idea to switch to speculation if type_preserved is
2305 not set instead of punting. */
2306 if (jfunc->type == IPA_JF_PASS_THROUGH)
2308 if (ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
2309 goto prop_fail;
2310 type_preserved = ipa_get_jf_pass_through_type_preserved (jfunc);
2311 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2313 else
2315 type_preserved = ipa_get_jf_ancestor_type_preserved (jfunc);
2316 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2319 src_lat = ipa_get_poly_ctx_lat (caller_info, src_idx);
2320 /* If we would need to clone the caller and cannot, do not propagate. */
2321 if (!ipcp_versionable_function_p (cs->caller)
2322 && (src_lat->contains_variable
2323 || (src_lat->values_count > 1)))
2324 goto prop_fail;
2326 ipcp_value<ipa_polymorphic_call_context> *src_val;
2327 for (src_val = src_lat->values; src_val; src_val = src_val->next)
2329 ipa_polymorphic_call_context cur = src_val->value;
2331 if (!type_preserved)
2332 cur.possible_dynamic_type_change (cs->in_polymorphic_cdtor);
2333 if (jfunc->type == IPA_JF_ANCESTOR)
2334 cur.offset_by (ipa_get_jf_ancestor_offset (jfunc));
2335 /* TODO: In cases we know how the context is going to be used,
2336 we can improve the result by passing proper OTR_TYPE. */
2337 cur.combine_with (edge_ctx);
2338 if (!cur.useless_p ())
2340 if (src_lat->contains_variable
2341 && !edge_ctx.equal_to (cur))
2342 ret |= dest_lat->set_contains_variable ();
2343 ret |= dest_lat->add_value (cur, cs, src_val, src_idx);
2344 added_sth = true;
2349 prop_fail:
2350 if (!added_sth)
2352 if (!edge_ctx.useless_p ())
2353 ret |= dest_lat->add_value (edge_ctx, cs);
2354 else
2355 ret |= dest_lat->set_contains_variable ();
2358 return ret;
2361 /* Propagate bits across jfunc that is associated with
2362 edge cs and update dest_lattice accordingly. */
2364 bool
2365 propagate_bits_across_jump_function (cgraph_edge *cs, int idx,
2366 ipa_jump_func *jfunc,
2367 ipcp_bits_lattice *dest_lattice)
2369 if (dest_lattice->bottom_p ())
2370 return false;
2372 enum availability availability;
2373 cgraph_node *callee = cs->callee->function_symbol (&availability);
2374 ipa_node_params *callee_info = ipa_node_params_sum->get (callee);
2375 tree parm_type = ipa_get_type (callee_info, idx);
2377 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2378 transform for these cases. Similarly, we can have bad type mismatches
2379 with LTO, avoid doing anything with those too. */
2380 if (!parm_type
2381 || (!INTEGRAL_TYPE_P (parm_type) && !POINTER_TYPE_P (parm_type)))
2383 if (dump_file && (dump_flags & TDF_DETAILS))
2384 fprintf (dump_file, "Setting dest_lattice to bottom, because type of "
2385 "param %i of %s is NULL or unsuitable for bits propagation\n",
2386 idx, cs->callee->dump_name ());
2388 return dest_lattice->set_to_bottom ();
2391 unsigned precision = TYPE_PRECISION (parm_type);
2392 signop sgn = TYPE_SIGN (parm_type);
2394 if (jfunc->type == IPA_JF_PASS_THROUGH
2395 || jfunc->type == IPA_JF_ANCESTOR)
2397 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2398 tree operand = NULL_TREE;
2399 enum tree_code code;
2400 unsigned src_idx;
2401 bool keep_null = false;
2403 if (jfunc->type == IPA_JF_PASS_THROUGH)
2405 code = ipa_get_jf_pass_through_operation (jfunc);
2406 src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2407 if (code != NOP_EXPR)
2408 operand = ipa_get_jf_pass_through_operand (jfunc);
2410 else
2412 code = POINTER_PLUS_EXPR;
2413 src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2414 unsigned HOST_WIDE_INT offset
2415 = ipa_get_jf_ancestor_offset (jfunc) / BITS_PER_UNIT;
2416 keep_null = (ipa_get_jf_ancestor_keep_null (jfunc) || !offset);
2417 operand = build_int_cstu (size_type_node, offset);
2420 class ipcp_param_lattices *src_lats
2421 = ipa_get_parm_lattices (caller_info, src_idx);
2423 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2424 for eg consider:
2425 int f(int x)
2427 g (x & 0xff);
2429 Assume lattice for x is bottom, however we can still propagate
2430 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2431 and we store it in jump function during analysis stage. */
2433 if (!src_lats->bits_lattice.bottom_p ())
2435 bool drop_all_ones
2436 = keep_null && !src_lats->bits_lattice.known_nonzero_p ();
2438 return dest_lattice->meet_with (src_lats->bits_lattice, precision,
2439 sgn, code, operand, drop_all_ones);
2443 if (jfunc->bits)
2444 return dest_lattice->meet_with (jfunc->bits->value, jfunc->bits->mask,
2445 precision);
2446 else
2447 return dest_lattice->set_to_bottom ();
2450 /* Propagate value range across jump function JFUNC that is associated with
2451 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2452 accordingly. */
2454 static bool
2455 propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
2456 class ipcp_param_lattices *dest_plats,
2457 tree param_type)
2459 ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
2461 if (dest_lat->bottom_p ())
2462 return false;
2464 if (!param_type
2465 || (!INTEGRAL_TYPE_P (param_type)
2466 && !POINTER_TYPE_P (param_type)))
2467 return dest_lat->set_to_bottom ();
2469 if (jfunc->type == IPA_JF_PASS_THROUGH)
2471 enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
2472 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2473 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2474 class ipcp_param_lattices *src_lats
2475 = ipa_get_parm_lattices (caller_info, src_idx);
2476 tree operand_type = ipa_get_type (caller_info, src_idx);
2478 if (src_lats->m_value_range.bottom_p ())
2479 return dest_lat->set_to_bottom ();
2481 value_range vr;
2482 if (TREE_CODE_CLASS (operation) == tcc_unary)
2483 ipa_vr_operation_and_type_effects (&vr,
2484 &src_lats->m_value_range.m_vr,
2485 operation, param_type,
2486 operand_type);
2487 /* A crude way to prevent unbounded number of value range updates
2488 in SCC components. We should allow limited number of updates within
2489 SCC, too. */
2490 else if (!ipa_edge_within_scc (cs))
2492 tree op = ipa_get_jf_pass_through_operand (jfunc);
2493 value_range op_vr (op, op);
2494 value_range op_res,res;
2496 range_fold_binary_expr (&op_res, operation, operand_type,
2497 &src_lats->m_value_range.m_vr, &op_vr);
2498 ipa_vr_operation_and_type_effects (&vr,
2499 &op_res,
2500 NOP_EXPR, param_type,
2501 operand_type);
2503 if (!vr.undefined_p () && !vr.varying_p ())
2505 if (jfunc->m_vr)
2507 value_range jvr;
2508 if (ipa_vr_operation_and_type_effects (&jvr, jfunc->m_vr,
2509 NOP_EXPR,
2510 param_type,
2511 jfunc->m_vr->type ()))
2512 vr.intersect (jvr);
2514 return dest_lat->meet_with (&vr);
2517 else if (jfunc->type == IPA_JF_CONST)
2519 tree val = ipa_get_jf_constant (jfunc);
2520 if (TREE_CODE (val) == INTEGER_CST)
2522 val = fold_convert (param_type, val);
2523 if (TREE_OVERFLOW_P (val))
2524 val = drop_tree_overflow (val);
2526 value_range tmpvr (val, val);
2527 return dest_lat->meet_with (&tmpvr);
2531 value_range vr;
2532 if (jfunc->m_vr
2533 && ipa_vr_operation_and_type_effects (&vr, jfunc->m_vr, NOP_EXPR,
2534 param_type,
2535 jfunc->m_vr->type ()))
2536 return dest_lat->meet_with (&vr);
2537 else
2538 return dest_lat->set_to_bottom ();
2541 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2542 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2543 other cases, return false). If there are no aggregate items, set
2544 aggs_by_ref to NEW_AGGS_BY_REF. */
2546 static bool
2547 set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
2548 bool new_aggs_by_ref)
2550 if (dest_plats->aggs)
2552 if (dest_plats->aggs_by_ref != new_aggs_by_ref)
2554 set_agg_lats_to_bottom (dest_plats);
2555 return true;
2558 else
2559 dest_plats->aggs_by_ref = new_aggs_by_ref;
2560 return false;
2563 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2564 already existing lattice for the given OFFSET and SIZE, marking all skipped
2565 lattices as containing variable and checking for overlaps. If there is no
2566 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2567 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2568 unless there are too many already. If there are two many, return false. If
2569 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2570 skipped lattices were newly marked as containing variable, set *CHANGE to
2571 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2573 static bool
2574 merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
2575 HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
2576 struct ipcp_agg_lattice ***aglat,
2577 bool pre_existing, bool *change, int max_agg_items)
2579 gcc_checking_assert (offset >= 0);
2581 while (**aglat && (**aglat)->offset < offset)
2583 if ((**aglat)->offset + (**aglat)->size > offset)
2585 set_agg_lats_to_bottom (dest_plats);
2586 return false;
2588 *change |= (**aglat)->set_contains_variable ();
2589 *aglat = &(**aglat)->next;
2592 if (**aglat && (**aglat)->offset == offset)
2594 if ((**aglat)->size != val_size)
2596 set_agg_lats_to_bottom (dest_plats);
2597 return false;
2599 gcc_assert (!(**aglat)->next
2600 || (**aglat)->next->offset >= offset + val_size);
2601 return true;
2603 else
2605 struct ipcp_agg_lattice *new_al;
2607 if (**aglat && (**aglat)->offset < offset + val_size)
2609 set_agg_lats_to_bottom (dest_plats);
2610 return false;
2612 if (dest_plats->aggs_count == max_agg_items)
2613 return false;
2614 dest_plats->aggs_count++;
2615 new_al = ipcp_agg_lattice_pool.allocate ();
2616 memset (new_al, 0, sizeof (*new_al));
2618 new_al->offset = offset;
2619 new_al->size = val_size;
2620 new_al->contains_variable = pre_existing;
2622 new_al->next = **aglat;
2623 **aglat = new_al;
2624 return true;
2628 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2629 containing an unknown value. */
2631 static bool
2632 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice *aglat)
2634 bool ret = false;
2635 while (aglat)
2637 ret |= aglat->set_contains_variable ();
2638 aglat = aglat->next;
2640 return ret;
2643 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2644 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2645 parameter used for lattice value sources. Return true if DEST_PLATS changed
2646 in any way. */
2648 static bool
2649 merge_aggregate_lattices (struct cgraph_edge *cs,
2650 class ipcp_param_lattices *dest_plats,
2651 class ipcp_param_lattices *src_plats,
2652 int src_idx, HOST_WIDE_INT offset_delta)
2654 bool pre_existing = dest_plats->aggs != NULL;
2655 struct ipcp_agg_lattice **dst_aglat;
2656 bool ret = false;
2658 if (set_check_aggs_by_ref (dest_plats, src_plats->aggs_by_ref))
2659 return true;
2660 if (src_plats->aggs_bottom)
2661 return set_agg_lats_contain_variable (dest_plats);
2662 if (src_plats->aggs_contain_variable)
2663 ret |= set_agg_lats_contain_variable (dest_plats);
2664 dst_aglat = &dest_plats->aggs;
2666 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2667 param_ipa_max_agg_items);
2668 for (struct ipcp_agg_lattice *src_aglat = src_plats->aggs;
2669 src_aglat;
2670 src_aglat = src_aglat->next)
2672 HOST_WIDE_INT new_offset = src_aglat->offset - offset_delta;
2674 if (new_offset < 0)
2675 continue;
2676 if (merge_agg_lats_step (dest_plats, new_offset, src_aglat->size,
2677 &dst_aglat, pre_existing, &ret, max_agg_items))
2679 struct ipcp_agg_lattice *new_al = *dst_aglat;
2681 dst_aglat = &(*dst_aglat)->next;
2682 if (src_aglat->bottom)
2684 ret |= new_al->set_contains_variable ();
2685 continue;
2687 if (src_aglat->contains_variable)
2688 ret |= new_al->set_contains_variable ();
2689 for (ipcp_value<tree> *val = src_aglat->values;
2690 val;
2691 val = val->next)
2692 ret |= new_al->add_value (val->value, cs, val, src_idx,
2693 src_aglat->offset);
2695 else if (dest_plats->aggs_bottom)
2696 return true;
2698 ret |= set_chain_of_aglats_contains_variable (*dst_aglat);
2699 return ret;
2702 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2703 pass-through JFUNC and if so, whether it has conform and conforms to the
2704 rules about propagating values passed by reference. */
2706 static bool
2707 agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
2708 struct ipa_jump_func *jfunc)
2710 return src_plats->aggs
2711 && (!src_plats->aggs_by_ref
2712 || ipa_get_jf_pass_through_agg_preserved (jfunc));
2715 /* Propagate values through ITEM, jump function for a part of an aggregate,
2716 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2717 associated with the jump function. Return true if AGLAT changed in any
2718 way. */
2720 static bool
2721 propagate_aggregate_lattice (struct cgraph_edge *cs,
2722 struct ipa_agg_jf_item *item,
2723 struct ipcp_agg_lattice *aglat)
2725 class ipa_node_params *caller_info;
2726 class ipcp_param_lattices *src_plats;
2727 struct ipcp_lattice<tree> *src_lat;
2728 HOST_WIDE_INT src_offset;
2729 int src_idx;
2730 tree load_type;
2731 bool ret;
2733 if (item->jftype == IPA_JF_CONST)
2735 tree value = item->value.constant;
2737 gcc_checking_assert (is_gimple_ip_invariant (value));
2738 return aglat->add_value (value, cs, NULL, 0);
2741 gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
2742 || item->jftype == IPA_JF_LOAD_AGG);
2744 caller_info = ipa_node_params_sum->get (cs->caller);
2745 src_idx = item->value.pass_through.formal_id;
2746 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2748 if (item->jftype == IPA_JF_PASS_THROUGH)
2750 load_type = NULL_TREE;
2751 src_lat = &src_plats->itself;
2752 src_offset = -1;
2754 else
2756 HOST_WIDE_INT load_offset = item->value.load_agg.offset;
2757 struct ipcp_agg_lattice *src_aglat;
2759 for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
2760 if (src_aglat->offset >= load_offset)
2761 break;
2763 load_type = item->value.load_agg.type;
2764 if (!src_aglat
2765 || src_aglat->offset > load_offset
2766 || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
2767 || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
2768 return aglat->set_contains_variable ();
2770 src_lat = src_aglat;
2771 src_offset = load_offset;
2774 if (src_lat->bottom
2775 || (!ipcp_versionable_function_p (cs->caller)
2776 && !src_lat->is_single_const ()))
2777 return aglat->set_contains_variable ();
2779 ret = propagate_vals_across_arith_jfunc (cs,
2780 item->value.pass_through.operation,
2781 load_type,
2782 item->value.pass_through.operand,
2783 src_lat, aglat,
2784 src_offset,
2785 src_idx,
2786 item->type);
2788 if (src_lat->contains_variable)
2789 ret |= aglat->set_contains_variable ();
2791 return ret;
2794 /* Propagate scalar values across jump function JFUNC that is associated with
2795 edge CS and put the values into DEST_LAT. */
2797 static bool
2798 propagate_aggs_across_jump_function (struct cgraph_edge *cs,
2799 struct ipa_jump_func *jfunc,
2800 class ipcp_param_lattices *dest_plats)
2802 bool ret = false;
2804 if (dest_plats->aggs_bottom)
2805 return false;
2807 if (jfunc->type == IPA_JF_PASS_THROUGH
2808 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
2810 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2811 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
2812 class ipcp_param_lattices *src_plats;
2814 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2815 if (agg_pass_through_permissible_p (src_plats, jfunc))
2817 /* Currently we do not produce clobber aggregate jump
2818 functions, replace with merging when we do. */
2819 gcc_assert (!jfunc->agg.items);
2820 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats,
2821 src_idx, 0);
2822 return ret;
2825 else if (jfunc->type == IPA_JF_ANCESTOR
2826 && ipa_get_jf_ancestor_agg_preserved (jfunc))
2828 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
2829 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
2830 class ipcp_param_lattices *src_plats;
2832 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
2833 if (src_plats->aggs && src_plats->aggs_by_ref)
2835 /* Currently we do not produce clobber aggregate jump
2836 functions, replace with merging when we do. */
2837 gcc_assert (!jfunc->agg.items);
2838 ret |= merge_aggregate_lattices (cs, dest_plats, src_plats, src_idx,
2839 ipa_get_jf_ancestor_offset (jfunc));
2841 else if (!src_plats->aggs_by_ref)
2842 ret |= set_agg_lats_to_bottom (dest_plats);
2843 else
2844 ret |= set_agg_lats_contain_variable (dest_plats);
2845 return ret;
2848 if (jfunc->agg.items)
2850 bool pre_existing = dest_plats->aggs != NULL;
2851 struct ipcp_agg_lattice **aglat = &dest_plats->aggs;
2852 struct ipa_agg_jf_item *item;
2853 int i;
2855 if (set_check_aggs_by_ref (dest_plats, jfunc->agg.by_ref))
2856 return true;
2858 int max_agg_items = opt_for_fn (cs->callee->function_symbol ()->decl,
2859 param_ipa_max_agg_items);
2860 FOR_EACH_VEC_ELT (*jfunc->agg.items, i, item)
2862 HOST_WIDE_INT val_size;
2864 if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
2865 continue;
2866 val_size = tree_to_shwi (TYPE_SIZE (item->type));
2868 if (merge_agg_lats_step (dest_plats, item->offset, val_size,
2869 &aglat, pre_existing, &ret, max_agg_items))
2871 ret |= propagate_aggregate_lattice (cs, item, *aglat);
2872 aglat = &(*aglat)->next;
2874 else if (dest_plats->aggs_bottom)
2875 return true;
2878 ret |= set_chain_of_aglats_contains_variable (*aglat);
2880 else
2881 ret |= set_agg_lats_contain_variable (dest_plats);
2883 return ret;
2886 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2887 non-thunk) destination, the call passes through a thunk. */
2889 static bool
2890 call_passes_through_thunk (cgraph_edge *cs)
2892 cgraph_node *alias_or_thunk = cs->callee;
2893 while (alias_or_thunk->alias)
2894 alias_or_thunk = alias_or_thunk->get_alias_target ();
2895 return alias_or_thunk->thunk;
2898 /* Propagate constants from the caller to the callee of CS. INFO describes the
2899 caller. */
2901 static bool
2902 propagate_constants_across_call (struct cgraph_edge *cs)
2904 class ipa_node_params *callee_info;
2905 enum availability availability;
2906 cgraph_node *callee;
2907 class ipa_edge_args *args;
2908 bool ret = false;
2909 int i, args_count, parms_count;
2911 callee = cs->callee->function_symbol (&availability);
2912 if (!callee->definition)
2913 return false;
2914 gcc_checking_assert (callee->has_gimple_body_p ());
2915 callee_info = ipa_node_params_sum->get (callee);
2916 if (!callee_info)
2917 return false;
2919 args = ipa_edge_args_sum->get (cs);
2920 parms_count = ipa_get_param_count (callee_info);
2921 if (parms_count == 0)
2922 return false;
2923 if (!args
2924 || !opt_for_fn (cs->caller->decl, flag_ipa_cp)
2925 || !opt_for_fn (cs->caller->decl, optimize))
2927 for (i = 0; i < parms_count; i++)
2928 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2929 i));
2930 return ret;
2932 args_count = ipa_get_cs_argument_count (args);
2934 /* If this call goes through a thunk we must not propagate to the first (0th)
2935 parameter. However, we might need to uncover a thunk from below a series
2936 of aliases first. */
2937 if (call_passes_through_thunk (cs))
2939 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
2940 0));
2941 i = 1;
2943 else
2944 i = 0;
2946 for (; (i < args_count) && (i < parms_count); i++)
2948 struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
2949 class ipcp_param_lattices *dest_plats;
2950 tree param_type = ipa_get_type (callee_info, i);
2952 dest_plats = ipa_get_parm_lattices (callee_info, i);
2953 if (availability == AVAIL_INTERPOSABLE)
2954 ret |= set_all_contains_variable (dest_plats);
2955 else
2957 ret |= propagate_scalar_across_jump_function (cs, jump_func,
2958 &dest_plats->itself,
2959 param_type);
2960 ret |= propagate_context_across_jump_function (cs, jump_func, i,
2961 &dest_plats->ctxlat);
2963 |= propagate_bits_across_jump_function (cs, i, jump_func,
2964 &dest_plats->bits_lattice);
2965 ret |= propagate_aggs_across_jump_function (cs, jump_func,
2966 dest_plats);
2967 if (opt_for_fn (callee->decl, flag_ipa_vrp))
2968 ret |= propagate_vr_across_jump_function (cs, jump_func,
2969 dest_plats, param_type);
2970 else
2971 ret |= dest_plats->m_value_range.set_to_bottom ();
2974 for (; i < parms_count; i++)
2975 ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info, i));
2977 return ret;
2980 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2981 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2982 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2984 static tree
2985 ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
2986 const vec<tree> &known_csts,
2987 const vec<ipa_polymorphic_call_context> &known_contexts,
2988 const vec<ipa_agg_value_set> &known_aggs,
2989 struct ipa_agg_replacement_value *agg_reps,
2990 bool *speculative)
2992 int param_index = ie->indirect_info->param_index;
2993 HOST_WIDE_INT anc_offset;
2994 tree t = NULL;
2995 tree target = NULL;
2997 *speculative = false;
2999 if (param_index == -1)
3000 return NULL_TREE;
3002 if (!ie->indirect_info->polymorphic)
3004 tree t = NULL;
3006 if (ie->indirect_info->agg_contents)
3008 t = NULL;
3009 if (agg_reps && ie->indirect_info->guaranteed_unmodified)
3011 while (agg_reps)
3013 if (agg_reps->index == param_index
3014 && agg_reps->offset == ie->indirect_info->offset
3015 && agg_reps->by_ref == ie->indirect_info->by_ref)
3017 t = agg_reps->value;
3018 break;
3020 agg_reps = agg_reps->next;
3023 if (!t)
3025 const ipa_agg_value_set *agg;
3026 if (known_aggs.length () > (unsigned int) param_index)
3027 agg = &known_aggs[param_index];
3028 else
3029 agg = NULL;
3030 bool from_global_constant;
3031 t = ipa_find_agg_cst_for_param (agg,
3032 (unsigned) param_index
3033 < known_csts.length ()
3034 ? known_csts[param_index]
3035 : NULL,
3036 ie->indirect_info->offset,
3037 ie->indirect_info->by_ref,
3038 &from_global_constant);
3039 if (t
3040 && !from_global_constant
3041 && !ie->indirect_info->guaranteed_unmodified)
3042 t = NULL_TREE;
3045 else if ((unsigned) param_index < known_csts.length ())
3046 t = known_csts[param_index];
3048 if (t
3049 && TREE_CODE (t) == ADDR_EXPR
3050 && TREE_CODE (TREE_OPERAND (t, 0)) == FUNCTION_DECL)
3051 return TREE_OPERAND (t, 0);
3052 else
3053 return NULL_TREE;
3056 if (!opt_for_fn (ie->caller->decl, flag_devirtualize))
3057 return NULL_TREE;
3059 gcc_assert (!ie->indirect_info->agg_contents);
3060 anc_offset = ie->indirect_info->offset;
3062 t = NULL;
3064 /* Try to work out value of virtual table pointer value in replacements. */
3065 if (!t && agg_reps && !ie->indirect_info->by_ref)
3067 while (agg_reps)
3069 if (agg_reps->index == param_index
3070 && agg_reps->offset == ie->indirect_info->offset
3071 && agg_reps->by_ref)
3073 t = agg_reps->value;
3074 break;
3076 agg_reps = agg_reps->next;
3080 /* Try to work out value of virtual table pointer value in known
3081 aggregate values. */
3082 if (!t && known_aggs.length () > (unsigned int) param_index
3083 && !ie->indirect_info->by_ref)
3085 const ipa_agg_value_set *agg = &known_aggs[param_index];
3086 t = ipa_find_agg_cst_for_param (agg,
3087 (unsigned) param_index
3088 < known_csts.length ()
3089 ? known_csts[param_index] : NULL,
3090 ie->indirect_info->offset, true);
3093 /* If we found the virtual table pointer, lookup the target. */
3094 if (t)
3096 tree vtable;
3097 unsigned HOST_WIDE_INT offset;
3098 if (vtable_pointer_value_to_vtable (t, &vtable, &offset))
3100 bool can_refer;
3101 target = gimple_get_virt_method_for_vtable (ie->indirect_info->otr_token,
3102 vtable, offset, &can_refer);
3103 if (can_refer)
3105 if (!target
3106 || fndecl_built_in_p (target, BUILT_IN_UNREACHABLE)
3107 || !possible_polymorphic_call_target_p
3108 (ie, cgraph_node::get (target)))
3110 /* Do not speculate builtin_unreachable, it is stupid! */
3111 if (ie->indirect_info->vptr_changed)
3112 return NULL;
3113 target = ipa_impossible_devirt_target (ie, target);
3115 *speculative = ie->indirect_info->vptr_changed;
3116 if (!*speculative)
3117 return target;
3122 /* Do we know the constant value of pointer? */
3123 if (!t && (unsigned) param_index < known_csts.length ())
3124 t = known_csts[param_index];
3126 gcc_checking_assert (!t || TREE_CODE (t) != TREE_BINFO);
3128 ipa_polymorphic_call_context context;
3129 if (known_contexts.length () > (unsigned int) param_index)
3131 context = known_contexts[param_index];
3132 context.offset_by (anc_offset);
3133 if (ie->indirect_info->vptr_changed)
3134 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3135 ie->indirect_info->otr_type);
3136 if (t)
3138 ipa_polymorphic_call_context ctx2 = ipa_polymorphic_call_context
3139 (t, ie->indirect_info->otr_type, anc_offset);
3140 if (!ctx2.useless_p ())
3141 context.combine_with (ctx2, ie->indirect_info->otr_type);
3144 else if (t)
3146 context = ipa_polymorphic_call_context (t, ie->indirect_info->otr_type,
3147 anc_offset);
3148 if (ie->indirect_info->vptr_changed)
3149 context.possible_dynamic_type_change (ie->in_polymorphic_cdtor,
3150 ie->indirect_info->otr_type);
3152 else
3153 return NULL_TREE;
3155 vec <cgraph_node *>targets;
3156 bool final;
3158 targets = possible_polymorphic_call_targets
3159 (ie->indirect_info->otr_type,
3160 ie->indirect_info->otr_token,
3161 context, &final);
3162 if (!final || targets.length () > 1)
3164 struct cgraph_node *node;
3165 if (*speculative)
3166 return target;
3167 if (!opt_for_fn (ie->caller->decl, flag_devirtualize_speculatively)
3168 || ie->speculative || !ie->maybe_hot_p ())
3169 return NULL;
3170 node = try_speculative_devirtualization (ie->indirect_info->otr_type,
3171 ie->indirect_info->otr_token,
3172 context);
3173 if (node)
3175 *speculative = true;
3176 target = node->decl;
3178 else
3179 return NULL;
3181 else
3183 *speculative = false;
3184 if (targets.length () == 1)
3185 target = targets[0]->decl;
3186 else
3187 target = ipa_impossible_devirt_target (ie, NULL_TREE);
3190 if (target && !possible_polymorphic_call_target_p (ie,
3191 cgraph_node::get (target)))
3193 if (*speculative)
3194 return NULL;
3195 target = ipa_impossible_devirt_target (ie, target);
3198 return target;
3201 /* If an indirect edge IE can be turned into a direct one based on data in
3202 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3203 whether the discovered target is only speculative guess. */
3205 tree
3206 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3207 ipa_call_arg_values *avals,
3208 bool *speculative)
3210 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3211 avals->m_known_contexts,
3212 avals->m_known_aggs,
3213 NULL, speculative);
3216 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3218 tree
3219 ipa_get_indirect_edge_target (struct cgraph_edge *ie,
3220 ipa_auto_call_arg_values *avals,
3221 bool *speculative)
3223 return ipa_get_indirect_edge_target_1 (ie, avals->m_known_vals,
3224 avals->m_known_contexts,
3225 avals->m_known_aggs,
3226 NULL, speculative);
3229 /* Calculate devirtualization time bonus for NODE, assuming we know information
3230 about arguments stored in AVALS. */
3232 static int
3233 devirtualization_time_bonus (struct cgraph_node *node,
3234 ipa_auto_call_arg_values *avals)
3236 struct cgraph_edge *ie;
3237 int res = 0;
3239 for (ie = node->indirect_calls; ie; ie = ie->next_callee)
3241 struct cgraph_node *callee;
3242 class ipa_fn_summary *isummary;
3243 enum availability avail;
3244 tree target;
3245 bool speculative;
3247 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3248 if (!target)
3249 continue;
3251 /* Only bare minimum benefit for clearly un-inlineable targets. */
3252 res += 1;
3253 callee = cgraph_node::get (target);
3254 if (!callee || !callee->definition)
3255 continue;
3256 callee = callee->function_symbol (&avail);
3257 if (avail < AVAIL_AVAILABLE)
3258 continue;
3259 isummary = ipa_fn_summaries->get (callee);
3260 if (!isummary || !isummary->inlinable)
3261 continue;
3263 int size = ipa_size_summaries->get (callee)->size;
3264 /* FIXME: The values below need re-considering and perhaps also
3265 integrating into the cost metrics, at lest in some very basic way. */
3266 int max_inline_insns_auto
3267 = opt_for_fn (callee->decl, param_max_inline_insns_auto);
3268 if (size <= max_inline_insns_auto / 4)
3269 res += 31 / ((int)speculative + 1);
3270 else if (size <= max_inline_insns_auto / 2)
3271 res += 15 / ((int)speculative + 1);
3272 else if (size <= max_inline_insns_auto
3273 || DECL_DECLARED_INLINE_P (callee->decl))
3274 res += 7 / ((int)speculative + 1);
3277 return res;
3280 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3282 static int
3283 hint_time_bonus (cgraph_node *node, const ipa_call_estimates &estimates)
3285 int result = 0;
3286 ipa_hints hints = estimates.hints;
3287 if (hints & (INLINE_HINT_loop_iterations | INLINE_HINT_loop_stride))
3288 result += opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3290 sreal bonus_for_one = opt_for_fn (node->decl, param_ipa_cp_loop_hint_bonus);
3292 if (hints & INLINE_HINT_loop_iterations)
3293 result += (estimates.loops_with_known_iterations * bonus_for_one).to_int ();
3295 if (hints & INLINE_HINT_loop_stride)
3296 result += (estimates.loops_with_known_strides * bonus_for_one).to_int ();
3298 return result;
3301 /* If there is a reason to penalize the function described by INFO in the
3302 cloning goodness evaluation, do so. */
3304 static inline sreal
3305 incorporate_penalties (cgraph_node *node, ipa_node_params *info,
3306 sreal evaluation)
3308 if (info->node_within_scc && !info->node_is_self_scc)
3309 evaluation = (evaluation
3310 * (100 - opt_for_fn (node->decl,
3311 param_ipa_cp_recursion_penalty))) / 100;
3313 if (info->node_calling_single_call)
3314 evaluation = (evaluation
3315 * (100 - opt_for_fn (node->decl,
3316 param_ipa_cp_single_call_penalty)))
3317 / 100;
3319 return evaluation;
3322 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3323 and SIZE_COST and with the sum of frequencies of incoming edges to the
3324 potential new clone in FREQUENCIES. */
3326 static bool
3327 good_cloning_opportunity_p (struct cgraph_node *node, sreal time_benefit,
3328 sreal freq_sum, profile_count count_sum,
3329 int size_cost)
3331 if (time_benefit == 0
3332 || !opt_for_fn (node->decl, flag_ipa_cp_clone)
3333 || node->optimize_for_size_p ())
3334 return false;
3336 gcc_assert (size_cost > 0);
3338 ipa_node_params *info = ipa_node_params_sum->get (node);
3339 int eval_threshold = opt_for_fn (node->decl, param_ipa_cp_eval_threshold);
3340 if (count_sum > profile_count::zero ())
3342 gcc_assert (base_count > profile_count::zero ());
3343 sreal factor = count_sum.probability_in (base_count).to_sreal ();
3344 sreal evaluation = (time_benefit * factor) / size_cost;
3345 evaluation = incorporate_penalties (node, info, evaluation);
3346 evaluation *= 1000;
3348 if (dump_file && (dump_flags & TDF_DETAILS))
3350 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3351 "size: %i, count_sum: ", time_benefit.to_double (),
3352 size_cost);
3353 count_sum.dump (dump_file);
3354 fprintf (dump_file, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3355 info->node_within_scc
3356 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3357 info->node_calling_single_call ? ", single_call" : "",
3358 evaluation.to_double (), eval_threshold);
3361 return evaluation.to_int () >= eval_threshold;
3363 else
3365 sreal evaluation = (time_benefit * freq_sum) / size_cost;
3366 evaluation = incorporate_penalties (node, info, evaluation);
3367 evaluation *= 1000;
3369 if (dump_file && (dump_flags & TDF_DETAILS))
3370 fprintf (dump_file, " good_cloning_opportunity_p (time: %g, "
3371 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3372 "threshold: %i\n",
3373 time_benefit.to_double (), size_cost, freq_sum.to_double (),
3374 info->node_within_scc
3375 ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
3376 info->node_calling_single_call ? ", single_call" : "",
3377 evaluation.to_double (), eval_threshold);
3379 return evaluation.to_int () >= eval_threshold;
3383 /* Return all context independent values from aggregate lattices in PLATS in a
3384 vector. Return NULL if there are none. */
3386 static vec<ipa_agg_value>
3387 context_independent_aggregate_values (class ipcp_param_lattices *plats)
3389 vec<ipa_agg_value> res = vNULL;
3391 if (plats->aggs_bottom
3392 || plats->aggs_contain_variable
3393 || plats->aggs_count == 0)
3394 return vNULL;
3396 for (struct ipcp_agg_lattice *aglat = plats->aggs;
3397 aglat;
3398 aglat = aglat->next)
3399 if (aglat->is_single_const ())
3401 struct ipa_agg_value item;
3402 item.offset = aglat->offset;
3403 item.value = aglat->values->value;
3404 res.safe_push (item);
3406 return res;
3409 /* Grow vectors in AVALS and fill them with information about values of
3410 parameters that are known to be independent of the context. Only calculate
3411 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3412 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3413 parameters will be stored in it.
3415 TODO: Also grow context independent value range vectors. */
3417 static bool
3418 gather_context_independent_values (class ipa_node_params *info,
3419 ipa_auto_call_arg_values *avals,
3420 bool calculate_aggs,
3421 int *removable_params_cost)
3423 int i, count = ipa_get_param_count (info);
3424 bool ret = false;
3426 avals->m_known_vals.safe_grow_cleared (count, true);
3427 avals->m_known_contexts.safe_grow_cleared (count, true);
3428 if (calculate_aggs)
3429 avals->m_known_aggs.safe_grow_cleared (count, true);
3431 if (removable_params_cost)
3432 *removable_params_cost = 0;
3434 for (i = 0; i < count; i++)
3436 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3437 ipcp_lattice<tree> *lat = &plats->itself;
3439 if (lat->is_single_const ())
3441 ipcp_value<tree> *val = lat->values;
3442 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3443 avals->m_known_vals[i] = val->value;
3444 if (removable_params_cost)
3445 *removable_params_cost
3446 += estimate_move_cost (TREE_TYPE (val->value), false);
3447 ret = true;
3449 else if (removable_params_cost
3450 && !ipa_is_param_used (info, i))
3451 *removable_params_cost
3452 += ipa_get_param_move_cost (info, i);
3454 if (!ipa_is_param_used (info, i))
3455 continue;
3457 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3458 /* Do not account known context as reason for cloning. We can see
3459 if it permits devirtualization. */
3460 if (ctxlat->is_single_const ())
3461 avals->m_known_contexts[i] = ctxlat->values->value;
3463 if (calculate_aggs)
3465 vec<ipa_agg_value> agg_items;
3466 struct ipa_agg_value_set *agg;
3468 agg_items = context_independent_aggregate_values (plats);
3469 agg = &avals->m_known_aggs[i];
3470 agg->items = agg_items;
3471 agg->by_ref = plats->aggs_by_ref;
3472 ret |= !agg_items.is_empty ();
3476 return ret;
3479 /* Perform time and size measurement of NODE with the context given in AVALS,
3480 calculate the benefit compared to the node without specialization and store
3481 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3482 context-independent or unused removable parameters and EST_MOVE_COST, the
3483 estimated movement of the considered parameter. */
3485 static void
3486 perform_estimation_of_a_value (cgraph_node *node,
3487 ipa_auto_call_arg_values *avals,
3488 int removable_params_cost, int est_move_cost,
3489 ipcp_value_base *val)
3491 sreal time_benefit;
3492 ipa_call_estimates estimates;
3494 estimate_ipcp_clone_size_and_time (node, avals, &estimates);
3496 /* Extern inline functions have no cloning local time benefits because they
3497 will be inlined anyway. The only reason to clone them is if it enables
3498 optimization in any of the functions they call. */
3499 if (DECL_EXTERNAL (node->decl) && DECL_DECLARED_INLINE_P (node->decl))
3500 time_benefit = 0;
3501 else
3502 time_benefit = (estimates.nonspecialized_time - estimates.time)
3503 + (devirtualization_time_bonus (node, avals)
3504 + hint_time_bonus (node, estimates)
3505 + removable_params_cost + est_move_cost);
3507 int size = estimates.size;
3508 gcc_checking_assert (size >=0);
3509 /* The inliner-heuristics based estimates may think that in certain
3510 contexts some functions do not have any size at all but we want
3511 all specializations to have at least a tiny cost, not least not to
3512 divide by zero. */
3513 if (size == 0)
3514 size = 1;
3516 val->local_time_benefit = time_benefit;
3517 val->local_size_cost = size;
3520 /* Get the overall limit oof growth based on parameters extracted from growth.
3521 it does not really make sense to mix functions with different overall growth
3522 limits but it is possible and if it happens, we do not want to select one
3523 limit at random. */
3525 static long
3526 get_max_overall_size (cgraph_node *node)
3528 long max_new_size = orig_overall_size;
3529 long large_unit = opt_for_fn (node->decl, param_ipa_cp_large_unit_insns);
3530 if (max_new_size < large_unit)
3531 max_new_size = large_unit;
3532 int unit_growth = opt_for_fn (node->decl, param_ipa_cp_unit_growth);
3533 max_new_size += max_new_size * unit_growth / 100 + 1;
3534 return max_new_size;
3537 /* Iterate over known values of parameters of NODE and estimate the local
3538 effects in terms of time and size they have. */
3540 static void
3541 estimate_local_effects (struct cgraph_node *node)
3543 ipa_node_params *info = ipa_node_params_sum->get (node);
3544 int i, count = ipa_get_param_count (info);
3545 bool always_const;
3546 int removable_params_cost;
3548 if (!count || !ipcp_versionable_function_p (node))
3549 return;
3551 if (dump_file && (dump_flags & TDF_DETAILS))
3552 fprintf (dump_file, "\nEstimating effects for %s.\n", node->dump_name ());
3554 ipa_auto_call_arg_values avals;
3555 always_const = gather_context_independent_values (info, &avals, true,
3556 &removable_params_cost);
3557 int devirt_bonus = devirtualization_time_bonus (node, &avals);
3558 if (always_const || devirt_bonus
3559 || (removable_params_cost && node->can_change_signature))
3561 struct caller_statistics stats;
3562 ipa_call_estimates estimates;
3564 init_caller_stats (&stats);
3565 node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
3566 false);
3567 estimate_ipcp_clone_size_and_time (node, &avals, &estimates);
3568 sreal time = estimates.nonspecialized_time - estimates.time;
3569 time += devirt_bonus;
3570 time += hint_time_bonus (node, estimates);
3571 time += removable_params_cost;
3572 int size = estimates.size - stats.n_calls * removable_params_cost;
3574 if (dump_file)
3575 fprintf (dump_file, " - context independent values, size: %i, "
3576 "time_benefit: %f\n", size, (time).to_double ());
3578 if (size <= 0 || node->local)
3580 info->do_clone_for_all_contexts = true;
3582 if (dump_file)
3583 fprintf (dump_file, " Decided to specialize for all "
3584 "known contexts, code not going to grow.\n");
3586 else if (good_cloning_opportunity_p (node, time, stats.freq_sum,
3587 stats.count_sum, size))
3589 if (size + overall_size <= get_max_overall_size (node))
3591 info->do_clone_for_all_contexts = true;
3592 overall_size += size;
3594 if (dump_file)
3595 fprintf (dump_file, " Decided to specialize for all "
3596 "known contexts, growth (to %li) deemed "
3597 "beneficial.\n", overall_size);
3599 else if (dump_file && (dump_flags & TDF_DETAILS))
3600 fprintf (dump_file, " Not cloning for all contexts because "
3601 "maximum unit size would be reached with %li.\n",
3602 size + overall_size);
3604 else if (dump_file && (dump_flags & TDF_DETAILS))
3605 fprintf (dump_file, " Not cloning for all contexts because "
3606 "!good_cloning_opportunity_p.\n");
3610 for (i = 0; i < count; i++)
3612 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3613 ipcp_lattice<tree> *lat = &plats->itself;
3614 ipcp_value<tree> *val;
3616 if (lat->bottom
3617 || !lat->values
3618 || avals.m_known_vals[i])
3619 continue;
3621 for (val = lat->values; val; val = val->next)
3623 gcc_checking_assert (TREE_CODE (val->value) != TREE_BINFO);
3624 avals.m_known_vals[i] = val->value;
3626 int emc = estimate_move_cost (TREE_TYPE (val->value), true);
3627 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3628 emc, val);
3630 if (dump_file && (dump_flags & TDF_DETAILS))
3632 fprintf (dump_file, " - estimates for value ");
3633 print_ipcp_constant_value (dump_file, val->value);
3634 fprintf (dump_file, " for ");
3635 ipa_dump_param (dump_file, info, i);
3636 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3637 val->local_time_benefit.to_double (),
3638 val->local_size_cost);
3641 avals.m_known_vals[i] = NULL_TREE;
3644 for (i = 0; i < count; i++)
3646 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3648 if (!plats->virt_call)
3649 continue;
3651 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3652 ipcp_value<ipa_polymorphic_call_context> *val;
3654 if (ctxlat->bottom
3655 || !ctxlat->values
3656 || !avals.m_known_contexts[i].useless_p ())
3657 continue;
3659 for (val = ctxlat->values; val; val = val->next)
3661 avals.m_known_contexts[i] = val->value;
3662 perform_estimation_of_a_value (node, &avals, removable_params_cost,
3663 0, val);
3665 if (dump_file && (dump_flags & TDF_DETAILS))
3667 fprintf (dump_file, " - estimates for polymorphic context ");
3668 print_ipcp_constant_value (dump_file, val->value);
3669 fprintf (dump_file, " for ");
3670 ipa_dump_param (dump_file, info, i);
3671 fprintf (dump_file, ": time_benefit: %g, size: %i\n",
3672 val->local_time_benefit.to_double (),
3673 val->local_size_cost);
3676 avals.m_known_contexts[i] = ipa_polymorphic_call_context ();
3679 for (i = 0; i < count; i++)
3681 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3683 if (plats->aggs_bottom || !plats->aggs)
3684 continue;
3686 ipa_agg_value_set *agg = &avals.m_known_aggs[i];
3687 for (ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
3689 ipcp_value<tree> *val;
3690 if (aglat->bottom || !aglat->values
3691 /* If the following is true, the one value is in known_aggs. */
3692 || (!plats->aggs_contain_variable
3693 && aglat->is_single_const ()))
3694 continue;
3696 for (val = aglat->values; val; val = val->next)
3698 struct ipa_agg_value item;
3700 item.offset = aglat->offset;
3701 item.value = val->value;
3702 agg->items.safe_push (item);
3704 perform_estimation_of_a_value (node, &avals,
3705 removable_params_cost, 0, val);
3707 if (dump_file && (dump_flags & TDF_DETAILS))
3709 fprintf (dump_file, " - estimates for value ");
3710 print_ipcp_constant_value (dump_file, val->value);
3711 fprintf (dump_file, " for ");
3712 ipa_dump_param (dump_file, info, i);
3713 fprintf (dump_file, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3714 "]: time_benefit: %g, size: %i\n",
3715 plats->aggs_by_ref ? "ref " : "",
3716 aglat->offset,
3717 val->local_time_benefit.to_double (),
3718 val->local_size_cost);
3721 agg->items.pop ();
3728 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3729 topological sort of values. */
3731 template <typename valtype>
3732 void
3733 value_topo_info<valtype>::add_val (ipcp_value<valtype> *cur_val)
3735 ipcp_value_source<valtype> *src;
3737 if (cur_val->dfs)
3738 return;
3740 dfs_counter++;
3741 cur_val->dfs = dfs_counter;
3742 cur_val->low_link = dfs_counter;
3744 cur_val->topo_next = stack;
3745 stack = cur_val;
3746 cur_val->on_stack = true;
3748 for (src = cur_val->sources; src; src = src->next)
3749 if (src->val)
3751 if (src->val->dfs == 0)
3753 add_val (src->val);
3754 if (src->val->low_link < cur_val->low_link)
3755 cur_val->low_link = src->val->low_link;
3757 else if (src->val->on_stack
3758 && src->val->dfs < cur_val->low_link)
3759 cur_val->low_link = src->val->dfs;
3762 if (cur_val->dfs == cur_val->low_link)
3764 ipcp_value<valtype> *v, *scc_list = NULL;
3768 v = stack;
3769 stack = v->topo_next;
3770 v->on_stack = false;
3771 v->scc_no = cur_val->dfs;
3773 v->scc_next = scc_list;
3774 scc_list = v;
3776 while (v != cur_val);
3778 cur_val->topo_next = values_topo;
3779 values_topo = cur_val;
3783 /* Add all values in lattices associated with NODE to the topological sort if
3784 they are not there yet. */
3786 static void
3787 add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
3789 ipa_node_params *info = ipa_node_params_sum->get (node);
3790 int i, count = ipa_get_param_count (info);
3792 for (i = 0; i < count; i++)
3794 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
3795 ipcp_lattice<tree> *lat = &plats->itself;
3796 struct ipcp_agg_lattice *aglat;
3798 if (!lat->bottom)
3800 ipcp_value<tree> *val;
3801 for (val = lat->values; val; val = val->next)
3802 topo->constants.add_val (val);
3805 if (!plats->aggs_bottom)
3806 for (aglat = plats->aggs; aglat; aglat = aglat->next)
3807 if (!aglat->bottom)
3809 ipcp_value<tree> *val;
3810 for (val = aglat->values; val; val = val->next)
3811 topo->constants.add_val (val);
3814 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
3815 if (!ctxlat->bottom)
3817 ipcp_value<ipa_polymorphic_call_context> *ctxval;
3818 for (ctxval = ctxlat->values; ctxval; ctxval = ctxval->next)
3819 topo->contexts.add_val (ctxval);
3824 /* One pass of constants propagation along the call graph edges, from callers
3825 to callees (requires topological ordering in TOPO), iterate over strongly
3826 connected components. */
3828 static void
3829 propagate_constants_topo (class ipa_topo_info *topo)
3831 int i;
3833 for (i = topo->nnodes - 1; i >= 0; i--)
3835 unsigned j;
3836 struct cgraph_node *v, *node = topo->order[i];
3837 vec<cgraph_node *> cycle_nodes = ipa_get_nodes_in_cycle (node);
3839 /* First, iteratively propagate within the strongly connected component
3840 until all lattices stabilize. */
3841 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3842 if (v->has_gimple_body_p ())
3844 if (opt_for_fn (v->decl, flag_ipa_cp)
3845 && opt_for_fn (v->decl, optimize))
3846 push_node_to_stack (topo, v);
3847 /* When V is not optimized, we can not push it to stack, but
3848 still we need to set all its callees lattices to bottom. */
3849 else
3851 for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
3852 propagate_constants_across_call (cs);
3856 v = pop_node_from_stack (topo);
3857 while (v)
3859 struct cgraph_edge *cs;
3860 class ipa_node_params *info = NULL;
3861 bool self_scc = true;
3863 for (cs = v->callees; cs; cs = cs->next_callee)
3864 if (ipa_edge_within_scc (cs))
3866 cgraph_node *callee = cs->callee->function_symbol ();
3868 if (v != callee)
3869 self_scc = false;
3871 if (!info)
3873 info = ipa_node_params_sum->get (v);
3874 info->node_within_scc = true;
3877 if (propagate_constants_across_call (cs))
3878 push_node_to_stack (topo, callee);
3881 if (info)
3882 info->node_is_self_scc = self_scc;
3884 v = pop_node_from_stack (topo);
3887 /* Afterwards, propagate along edges leading out of the SCC, calculates
3888 the local effects of the discovered constants and all valid values to
3889 their topological sort. */
3890 FOR_EACH_VEC_ELT (cycle_nodes, j, v)
3891 if (v->has_gimple_body_p ()
3892 && opt_for_fn (v->decl, flag_ipa_cp)
3893 && opt_for_fn (v->decl, optimize))
3895 struct cgraph_edge *cs;
3897 estimate_local_effects (v);
3898 add_all_node_vals_to_toposort (v, topo);
3899 for (cs = v->callees; cs; cs = cs->next_callee)
3900 if (!ipa_edge_within_scc (cs))
3901 propagate_constants_across_call (cs);
3903 cycle_nodes.release ();
3907 /* Propagate the estimated effects of individual values along the topological
3908 from the dependent values to those they depend on. */
3910 template <typename valtype>
3911 void
3912 value_topo_info<valtype>::propagate_effects ()
3914 ipcp_value<valtype> *base;
3915 hash_set<ipcp_value<valtype> *> processed_srcvals;
3917 for (base = values_topo; base; base = base->topo_next)
3919 ipcp_value_source<valtype> *src;
3920 ipcp_value<valtype> *val;
3921 sreal time = 0;
3922 HOST_WIDE_INT size = 0;
3924 for (val = base; val; val = val->scc_next)
3926 time = time + val->local_time_benefit + val->prop_time_benefit;
3927 size = size + val->local_size_cost + val->prop_size_cost;
3930 for (val = base; val; val = val->scc_next)
3932 processed_srcvals.empty ();
3933 for (src = val->sources; src; src = src->next)
3934 if (src->val
3935 && src->cs->maybe_hot_p ())
3937 if (!processed_srcvals.add (src->val))
3939 HOST_WIDE_INT prop_size = size + src->val->prop_size_cost;
3940 if (prop_size < INT_MAX)
3941 src->val->prop_size_cost = prop_size;
3942 else
3943 continue;
3946 int special_factor = 1;
3947 if (val->same_scc (src->val))
3948 special_factor
3949 = opt_for_fn(src->cs->caller->decl,
3950 param_ipa_cp_recursive_freq_factor);
3951 else if (val->self_recursion_generated_p ()
3952 && (src->cs->callee->function_symbol ()
3953 == src->cs->caller))
3955 int max_recur_gen_depth
3956 = opt_for_fn(src->cs->caller->decl,
3957 param_ipa_cp_max_recursive_depth);
3958 special_factor = max_recur_gen_depth
3959 - val->self_recursion_generated_level + 1;
3962 src->val->prop_time_benefit
3963 += time * special_factor * src->cs->sreal_frequency ();
3966 if (size < INT_MAX)
3968 val->prop_time_benefit = time;
3969 val->prop_size_cost = size;
3971 else
3973 val->prop_time_benefit = 0;
3974 val->prop_size_cost = 0;
3980 /* Callback for qsort to sort counts of all edges. */
3982 static int
3983 compare_edge_profile_counts (const void *a, const void *b)
3985 const profile_count *cnt1 = (const profile_count *) a;
3986 const profile_count *cnt2 = (const profile_count *) b;
3988 if (*cnt1 < *cnt2)
3989 return 1;
3990 if (*cnt1 > *cnt2)
3991 return -1;
3992 return 0;
3996 /* Propagate constants, polymorphic contexts and their effects from the
3997 summaries interprocedurally. */
3999 static void
4000 ipcp_propagate_stage (class ipa_topo_info *topo)
4002 struct cgraph_node *node;
4004 if (dump_file)
4005 fprintf (dump_file, "\n Propagating constants:\n\n");
4007 base_count = profile_count::uninitialized ();
4009 bool compute_count_base = false;
4010 unsigned base_count_pos_percent = 0;
4011 FOR_EACH_DEFINED_FUNCTION (node)
4013 if (node->has_gimple_body_p ()
4014 && opt_for_fn (node->decl, flag_ipa_cp)
4015 && opt_for_fn (node->decl, optimize))
4017 ipa_node_params *info = ipa_node_params_sum->get (node);
4018 determine_versionability (node, info);
4020 unsigned nlattices = ipa_get_param_count (info);
4021 void *chunk = XCNEWVEC (class ipcp_param_lattices, nlattices);
4022 info->lattices = new (chunk) ipcp_param_lattices[nlattices];
4023 initialize_node_lattices (node);
4025 ipa_size_summary *s = ipa_size_summaries->get (node);
4026 if (node->definition && !node->alias && s != NULL)
4027 overall_size += s->self_size;
4028 if (node->count.ipa ().initialized_p ())
4030 compute_count_base = true;
4031 unsigned pos_percent = opt_for_fn (node->decl,
4032 param_ipa_cp_profile_count_base);
4033 base_count_pos_percent = MAX (base_count_pos_percent, pos_percent);
4037 if (compute_count_base)
4039 auto_vec<profile_count> all_edge_counts;
4040 all_edge_counts.reserve_exact (symtab->edges_count);
4041 FOR_EACH_DEFINED_FUNCTION (node)
4042 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4044 profile_count count = cs->count.ipa ();
4045 if (!(count > profile_count::zero ()))
4046 continue;
4048 enum availability avail;
4049 cgraph_node *tgt
4050 = cs->callee->function_or_virtual_thunk_symbol (&avail);
4051 ipa_node_params *info = ipa_node_params_sum->get (tgt);
4052 if (info && info->versionable)
4053 all_edge_counts.quick_push (count);
4056 if (!all_edge_counts.is_empty ())
4058 gcc_assert (base_count_pos_percent <= 100);
4059 all_edge_counts.qsort (compare_edge_profile_counts);
4061 unsigned base_count_pos
4062 = ((all_edge_counts.length () * (base_count_pos_percent)) / 100);
4063 base_count = all_edge_counts[base_count_pos];
4065 if (dump_file)
4067 fprintf (dump_file, "\nSelected base_count from %u edges at "
4068 "position %u, arriving at: ", all_edge_counts.length (),
4069 base_count_pos);
4070 base_count.dump (dump_file);
4071 fprintf (dump_file, "\n");
4074 else if (dump_file)
4075 fprintf (dump_file, "\nNo candidates with non-zero call count found, "
4076 "continuing as if without profile feedback.\n");
4079 orig_overall_size = overall_size;
4081 if (dump_file)
4082 fprintf (dump_file, "\noverall_size: %li\n", overall_size);
4084 propagate_constants_topo (topo);
4085 if (flag_checking)
4086 ipcp_verify_propagated_values ();
4087 topo->constants.propagate_effects ();
4088 topo->contexts.propagate_effects ();
4090 if (dump_file)
4092 fprintf (dump_file, "\nIPA lattices after all propagation:\n");
4093 print_all_lattices (dump_file, (dump_flags & TDF_DETAILS), true);
4097 /* Discover newly direct outgoing edges from NODE which is a new clone with
4098 known KNOWN_CSTS and make them direct. */
4100 static void
4101 ipcp_discover_new_direct_edges (struct cgraph_node *node,
4102 vec<tree> known_csts,
4103 vec<ipa_polymorphic_call_context>
4104 known_contexts,
4105 struct ipa_agg_replacement_value *aggvals)
4107 struct cgraph_edge *ie, *next_ie;
4108 bool found = false;
4110 for (ie = node->indirect_calls; ie; ie = next_ie)
4112 tree target;
4113 bool speculative;
4115 next_ie = ie->next_callee;
4116 target = ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
4117 vNULL, aggvals, &speculative);
4118 if (target)
4120 bool agg_contents = ie->indirect_info->agg_contents;
4121 bool polymorphic = ie->indirect_info->polymorphic;
4122 int param_index = ie->indirect_info->param_index;
4123 struct cgraph_edge *cs = ipa_make_edge_direct_to_target (ie, target,
4124 speculative);
4125 found = true;
4127 if (cs && !agg_contents && !polymorphic)
4129 ipa_node_params *info = ipa_node_params_sum->get (node);
4130 int c = ipa_get_controlled_uses (info, param_index);
4131 if (c != IPA_UNDESCRIBED_USE
4132 && !ipa_get_param_load_dereferenced (info, param_index))
4134 struct ipa_ref *to_del;
4136 c--;
4137 ipa_set_controlled_uses (info, param_index, c);
4138 if (dump_file && (dump_flags & TDF_DETAILS))
4139 fprintf (dump_file, " controlled uses count of param "
4140 "%i bumped down to %i\n", param_index, c);
4141 if (c == 0
4142 && (to_del = node->find_reference (cs->callee, NULL, 0)))
4144 if (dump_file && (dump_flags & TDF_DETAILS))
4145 fprintf (dump_file, " and even removing its "
4146 "cloning-created reference\n");
4147 to_del->remove_reference ();
4153 /* Turning calls to direct calls will improve overall summary. */
4154 if (found)
4155 ipa_update_overall_fn_summary (node);
4158 class edge_clone_summary;
4159 static call_summary <edge_clone_summary *> *edge_clone_summaries = NULL;
4161 /* Edge clone summary. */
4163 class edge_clone_summary
4165 public:
4166 /* Default constructor. */
4167 edge_clone_summary (): prev_clone (NULL), next_clone (NULL) {}
4169 /* Default destructor. */
4170 ~edge_clone_summary ()
4172 if (prev_clone)
4173 edge_clone_summaries->get (prev_clone)->next_clone = next_clone;
4174 if (next_clone)
4175 edge_clone_summaries->get (next_clone)->prev_clone = prev_clone;
4178 cgraph_edge *prev_clone;
4179 cgraph_edge *next_clone;
4182 class edge_clone_summary_t:
4183 public call_summary <edge_clone_summary *>
4185 public:
4186 edge_clone_summary_t (symbol_table *symtab):
4187 call_summary <edge_clone_summary *> (symtab)
4189 m_initialize_when_cloning = true;
4192 virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4193 edge_clone_summary *src_data,
4194 edge_clone_summary *dst_data);
4197 /* Edge duplication hook. */
4199 void
4200 edge_clone_summary_t::duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,
4201 edge_clone_summary *src_data,
4202 edge_clone_summary *dst_data)
4204 if (src_data->next_clone)
4205 edge_clone_summaries->get (src_data->next_clone)->prev_clone = dst_edge;
4206 dst_data->prev_clone = src_edge;
4207 dst_data->next_clone = src_data->next_clone;
4208 src_data->next_clone = dst_edge;
4211 /* Return true is CS calls DEST or its clone for all contexts. When
4212 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4213 edges from/to an all-context clone. */
4215 static bool
4216 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge *cs, cgraph_node *dest,
4217 bool allow_recursion_to_clone)
4219 enum availability availability;
4220 cgraph_node *callee = cs->callee->function_symbol (&availability);
4222 if (availability <= AVAIL_INTERPOSABLE)
4223 return false;
4224 if (callee == dest)
4225 return true;
4226 if (!allow_recursion_to_clone && cs->caller == callee)
4227 return false;
4229 ipa_node_params *info = ipa_node_params_sum->get (callee);
4230 return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
4233 /* Return true if edge CS does bring about the value described by SRC to
4234 DEST_VAL of node DEST or its clone for all contexts. */
4236 static bool
4237 cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
4238 cgraph_node *dest, ipcp_value<tree> *dest_val)
4240 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4242 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, !src->val)
4243 || caller_info->node_dead)
4244 return false;
4246 if (!src->val)
4247 return true;
4249 if (caller_info->ipcp_orig_node)
4251 tree t;
4252 if (src->offset == -1)
4253 t = caller_info->known_csts[src->index];
4254 else
4255 t = get_clone_agg_value (cs->caller, src->offset, src->index);
4256 return (t != NULL_TREE
4257 && values_equal_for_ipcp_p (src->val->value, t));
4259 else
4261 if (src->val == dest_val)
4262 return true;
4264 struct ipcp_agg_lattice *aglat;
4265 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4266 src->index);
4267 if (src->offset == -1)
4268 return (plats->itself.is_single_const ()
4269 && values_equal_for_ipcp_p (src->val->value,
4270 plats->itself.values->value));
4271 else
4273 if (plats->aggs_bottom || plats->aggs_contain_variable)
4274 return false;
4275 for (aglat = plats->aggs; aglat; aglat = aglat->next)
4276 if (aglat->offset == src->offset)
4277 return (aglat->is_single_const ()
4278 && values_equal_for_ipcp_p (src->val->value,
4279 aglat->values->value));
4281 return false;
4285 /* Return true if edge CS does bring about the value described by SRC to
4286 DST_VAL of node DEST or its clone for all contexts. */
4288 static bool
4289 cgraph_edge_brings_value_p (cgraph_edge *cs,
4290 ipcp_value_source<ipa_polymorphic_call_context> *src,
4291 cgraph_node *dest,
4292 ipcp_value<ipa_polymorphic_call_context> *)
4294 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
4296 if (!calls_same_node_or_its_all_contexts_clone_p (cs, dest, true)
4297 || caller_info->node_dead)
4298 return false;
4299 if (!src->val)
4300 return true;
4302 if (caller_info->ipcp_orig_node)
4303 return (caller_info->known_contexts.length () > (unsigned) src->index)
4304 && values_equal_for_ipcp_p (src->val->value,
4305 caller_info->known_contexts[src->index]);
4307 class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
4308 src->index);
4309 return plats->ctxlat.is_single_const ()
4310 && values_equal_for_ipcp_p (src->val->value,
4311 plats->ctxlat.values->value);
4314 /* Get the next clone in the linked list of clones of an edge. */
4316 static inline struct cgraph_edge *
4317 get_next_cgraph_edge_clone (struct cgraph_edge *cs)
4319 edge_clone_summary *s = edge_clone_summaries->get (cs);
4320 return s != NULL ? s->next_clone : NULL;
4323 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4324 of them is viable and hot, return true. In that case, for those that still
4325 hold, add their edge frequency and their number and cumulative profile
4326 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4327 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4329 template <typename valtype>
4330 static bool
4331 get_info_about_necessary_edges (ipcp_value<valtype> *val, cgraph_node *dest,
4332 sreal *freq_sum, int *caller_count,
4333 profile_count *rec_count_sum,
4334 profile_count *nonrec_count_sum)
4336 ipcp_value_source<valtype> *src;
4337 sreal freq = 0;
4338 int count = 0;
4339 profile_count rec_cnt = profile_count::zero ();
4340 profile_count nonrec_cnt = profile_count::zero ();
4341 bool hot = false;
4342 bool non_self_recursive = false;
4344 for (src = val->sources; src; src = src->next)
4346 struct cgraph_edge *cs = src->cs;
4347 while (cs)
4349 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4351 count++;
4352 freq += cs->sreal_frequency ();
4353 hot |= cs->maybe_hot_p ();
4354 if (cs->caller != dest)
4356 non_self_recursive = true;
4357 if (cs->count.ipa ().initialized_p ())
4358 rec_cnt += cs->count.ipa ();
4360 else if (cs->count.ipa ().initialized_p ())
4361 nonrec_cnt += cs->count.ipa ();
4363 cs = get_next_cgraph_edge_clone (cs);
4367 /* If the only edges bringing a value are self-recursive ones, do not bother
4368 evaluating it. */
4369 if (!non_self_recursive)
4370 return false;
4372 *freq_sum = freq;
4373 *caller_count = count;
4374 *rec_count_sum = rec_cnt;
4375 *nonrec_count_sum = nonrec_cnt;
4377 if (!hot && ipa_node_params_sum->get (dest)->node_within_scc)
4379 struct cgraph_edge *cs;
4381 /* Cold non-SCC source edge could trigger hot recursive execution of
4382 function. Consider the case as hot and rely on following cost model
4383 computation to further select right one. */
4384 for (cs = dest->callers; cs; cs = cs->next_caller)
4385 if (cs->caller == dest && cs->maybe_hot_p ())
4386 return true;
4389 return hot;
4392 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4393 to let a non-self-recursive caller be the first element. Thus, we can
4394 simplify intersecting operations on values that arrive from all of these
4395 callers, especially when there exists self-recursive call. Return true if
4396 this kind of adjustment is possible. */
4398 static bool
4399 adjust_callers_for_value_intersection (vec<cgraph_edge *> &callers,
4400 cgraph_node *node)
4402 for (unsigned i = 0; i < callers.length (); i++)
4404 cgraph_edge *cs = callers[i];
4406 if (cs->caller != node)
4408 if (i > 0)
4410 callers[i] = callers[0];
4411 callers[0] = cs;
4413 return true;
4416 return false;
4419 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4420 is assumed their number is known and equal to CALLER_COUNT. */
4422 template <typename valtype>
4423 static vec<cgraph_edge *>
4424 gather_edges_for_value (ipcp_value<valtype> *val, cgraph_node *dest,
4425 int caller_count)
4427 ipcp_value_source<valtype> *src;
4428 vec<cgraph_edge *> ret;
4430 ret.create (caller_count);
4431 for (src = val->sources; src; src = src->next)
4433 struct cgraph_edge *cs = src->cs;
4434 while (cs)
4436 if (cgraph_edge_brings_value_p (cs, src, dest, val))
4437 ret.quick_push (cs);
4438 cs = get_next_cgraph_edge_clone (cs);
4442 if (caller_count > 1)
4443 adjust_callers_for_value_intersection (ret, dest);
4445 return ret;
4448 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4449 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4450 should be set to true when the reference created for the constant should be
4451 a load one and not an address one because the corresponding parameter p is
4452 only used as *p. */
4454 static struct ipa_replace_map *
4455 get_replacement_map (class ipa_node_params *info, tree value, int parm_num,
4456 bool force_load_ref)
4458 struct ipa_replace_map *replace_map;
4460 replace_map = ggc_alloc<ipa_replace_map> ();
4461 if (dump_file)
4463 fprintf (dump_file, " replacing ");
4464 ipa_dump_param (dump_file, info, parm_num);
4466 fprintf (dump_file, " with const ");
4467 print_generic_expr (dump_file, value);
4469 if (force_load_ref)
4470 fprintf (dump_file, " - forcing load reference\n");
4471 else
4472 fprintf (dump_file, "\n");
4474 replace_map->parm_num = parm_num;
4475 replace_map->new_tree = value;
4476 replace_map->force_load_ref = force_load_ref;
4477 return replace_map;
4480 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4481 one, otherwise it will be referred to as the original node. */
4483 static void
4484 dump_profile_updates (cgraph_node *node, bool spec)
4486 if (spec)
4487 fprintf (dump_file, " setting count of the specialized node %s to ",
4488 node->dump_name ());
4489 else
4490 fprintf (dump_file, " setting count of the original node %s to ",
4491 node->dump_name ());
4493 node->count.dump (dump_file);
4494 fprintf (dump_file, "\n");
4495 for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee)
4497 fprintf (dump_file, " edge to %s has count ",
4498 cs->callee->dump_name ());
4499 cs->count.dump (dump_file);
4500 fprintf (dump_file, "\n");
4504 /* With partial train run we do not want to assume that original's count is
4505 zero whenever we redurect all executed edges to clone. Simply drop profile
4506 to local one in this case. In eany case, return the new value. ORIG_NODE
4507 is the original node and its count has not been updaed yet. */
4509 profile_count
4510 lenient_count_portion_handling (profile_count remainder, cgraph_node *orig_node)
4512 if (remainder.ipa_p () && !remainder.ipa ().nonzero_p ()
4513 && orig_node->count.ipa_p () && orig_node->count.ipa ().nonzero_p ()
4514 && opt_for_fn (orig_node->decl, flag_profile_partial_training))
4515 remainder = remainder.guessed_local ();
4517 return remainder;
4520 /* Structure to sum counts coming from nodes other than the original node and
4521 its clones. */
4523 struct gather_other_count_struct
4525 cgraph_node *orig;
4526 profile_count other_count;
4529 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4530 counts that come from non-self-recursive calls.. */
4532 static bool
4533 gather_count_of_non_rec_edges (cgraph_node *node, void *data)
4535 gather_other_count_struct *desc = (gather_other_count_struct *) data;
4536 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4537 if (cs->caller != desc->orig && cs->caller->clone_of != desc->orig)
4538 desc->other_count += cs->count.ipa ();
4539 return false;
4542 /* Structure to help analyze if we need to boost counts of some clones of some
4543 non-recursive edges to match the new callee count. */
4545 struct desc_incoming_count_struct
4547 cgraph_node *orig;
4548 hash_set <cgraph_edge *> *processed_edges;
4549 profile_count count;
4550 unsigned unproc_orig_rec_edges;
4553 /* Go over edges calling NODE and its thunks and gather information about
4554 incoming counts so that we know if we need to make any adjustments. */
4556 static void
4557 analyze_clone_icoming_counts (cgraph_node *node,
4558 desc_incoming_count_struct *desc)
4560 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4561 if (cs->caller->thunk)
4563 analyze_clone_icoming_counts (cs->caller, desc);
4564 continue;
4566 else
4568 if (cs->count.initialized_p ())
4569 desc->count += cs->count.ipa ();
4570 if (!desc->processed_edges->contains (cs)
4571 && cs->caller->clone_of == desc->orig)
4572 desc->unproc_orig_rec_edges++;
4576 /* If caller edge counts of a clone created for a self-recursive arithmetic
4577 jump function must be adjusted because it is coming from a the "seed" clone
4578 for the first value and so has been excessively scaled back as if it was not
4579 a recursive call, adjust it so that the incoming counts of NODE match its
4580 count. NODE is the node or its thunk. */
4582 static void
4583 adjust_clone_incoming_counts (cgraph_node *node,
4584 desc_incoming_count_struct *desc)
4586 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4587 if (cs->caller->thunk)
4589 adjust_clone_incoming_counts (cs->caller, desc);
4590 profile_count sum = profile_count::zero ();
4591 for (cgraph_edge *e = cs->caller->callers; e; e = e->next_caller)
4592 if (e->count.initialized_p ())
4593 sum += e->count.ipa ();
4594 cs->count = cs->count.combine_with_ipa_count (sum);
4596 else if (!desc->processed_edges->contains (cs)
4597 && cs->caller->clone_of == desc->orig)
4599 cs->count += desc->count;
4600 if (dump_file)
4602 fprintf (dump_file, " Adjusted count of an incoming edge of "
4603 "a clone %s -> %s to ", cs->caller->dump_name (),
4604 cs->callee->dump_name ());
4605 cs->count.dump (dump_file);
4606 fprintf (dump_file, "\n");
4611 /* When ORIG_NODE has been cloned for values which have been generated fora
4612 self-recursive call as a result of an arithmetic pass-through
4613 jump-functions, adjust its count together with counts of all such clones in
4614 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4616 The function sums the counts of the original node and all its clones that
4617 cannot be attributed to a specific clone because it comes from a
4618 non-recursive edge. This sum is then evenly divided between the clones and
4619 on top of that each one gets all the counts which can be attributed directly
4620 to it. */
4622 static void
4623 update_counts_for_self_gen_clones (cgraph_node *orig_node,
4624 const vec<cgraph_node *> &self_gen_clones)
4626 profile_count redist_sum = orig_node->count.ipa ();
4627 if (!(redist_sum > profile_count::zero ()))
4628 return;
4630 if (dump_file)
4631 fprintf (dump_file, " Updating profile of self recursive clone "
4632 "series\n");
4634 gather_other_count_struct gocs;
4635 gocs.orig = orig_node;
4636 gocs.other_count = profile_count::zero ();
4638 auto_vec <profile_count, 8> other_edges_count;
4639 for (cgraph_node *n : self_gen_clones)
4641 gocs.other_count = profile_count::zero ();
4642 n->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges,
4643 &gocs, false);
4644 other_edges_count.safe_push (gocs.other_count);
4645 redist_sum -= gocs.other_count;
4648 hash_set<cgraph_edge *> processed_edges;
4649 unsigned i = 0;
4650 for (cgraph_node *n : self_gen_clones)
4652 profile_count orig_count = n->count;
4653 profile_count new_count
4654 = (redist_sum.apply_scale (1, self_gen_clones.length ())
4655 + other_edges_count[i]);
4656 new_count = lenient_count_portion_handling (new_count, orig_node);
4657 n->count = new_count;
4658 profile_count::adjust_for_ipa_scaling (&new_count, &orig_count);
4659 for (cgraph_edge *cs = n->callees; cs; cs = cs->next_callee)
4661 cs->count = cs->count.apply_scale (new_count, orig_count);
4662 processed_edges.add (cs);
4664 for (cgraph_edge *cs = n->indirect_calls; cs; cs = cs->next_callee)
4665 cs->count = cs->count.apply_scale (new_count, orig_count);
4667 i++;
4670 /* There are still going to be edges to ORIG_NODE that have one or more
4671 clones coming from another node clone in SELF_GEN_CLONES and which we
4672 scaled by the same amount, which means that the total incoming sum of
4673 counts to ORIG_NODE will be too high, scale such edges back. */
4674 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4676 if (cs->callee->ultimate_alias_target () == orig_node)
4678 unsigned den = 0;
4679 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4680 if (e->callee->ultimate_alias_target () == orig_node
4681 && processed_edges.contains (e))
4682 den++;
4683 if (den > 0)
4684 for (cgraph_edge *e = cs; e; e = get_next_cgraph_edge_clone (e))
4685 if (e->callee->ultimate_alias_target () == orig_node
4686 && processed_edges.contains (e))
4687 e->count = e->count.apply_scale (1, den);
4691 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4692 along self-recursive edges are likely to have fairly low count and so
4693 edges from them to nodes in the self_gen_clones do not correspond to the
4694 artificially distributed count of the nodes, the total sum of incoming
4695 edges to some clones might be too low. Detect this situation and correct
4696 it. */
4697 for (cgraph_node *n : self_gen_clones)
4699 if (!(n->count.ipa () > profile_count::zero ()))
4700 continue;
4702 desc_incoming_count_struct desc;
4703 desc.orig = orig_node;
4704 desc.processed_edges = &processed_edges;
4705 desc.count = profile_count::zero ();
4706 desc.unproc_orig_rec_edges = 0;
4707 analyze_clone_icoming_counts (n, &desc);
4709 if (n->count.differs_from_p (desc.count))
4711 if (n->count > desc.count
4712 && desc.unproc_orig_rec_edges > 0)
4714 desc.count = n->count - desc.count;
4715 desc.count
4716 = desc.count.apply_scale (1, desc.unproc_orig_rec_edges);
4717 adjust_clone_incoming_counts (n, &desc);
4719 else if (dump_file)
4720 fprintf (dump_file,
4721 " Unable to fix up incoming counts for %s.\n",
4722 n->dump_name ());
4726 if (dump_file)
4727 for (cgraph_node *n : self_gen_clones)
4728 dump_profile_updates (n, n != orig_node);
4729 return;
4732 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4733 their profile information to reflect this. This function should not be used
4734 for clones generated for arithmetic pass-through jump functions on a
4735 self-recursive call graph edge, that situation is handled by
4736 update_counts_for_self_gen_clones. */
4738 static void
4739 update_profiling_info (struct cgraph_node *orig_node,
4740 struct cgraph_node *new_node)
4742 struct caller_statistics stats;
4743 profile_count new_sum;
4744 profile_count remainder, orig_node_count = orig_node->count.ipa ();
4746 if (!(orig_node_count > profile_count::zero ()))
4747 return;
4749 if (dump_file)
4751 fprintf (dump_file, " Updating profile from original count: ");
4752 orig_node_count.dump (dump_file);
4753 fprintf (dump_file, "\n");
4756 init_caller_stats (&stats, new_node);
4757 new_node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
4758 false);
4759 new_sum = stats.count_sum;
4761 if (new_sum > orig_node_count)
4763 /* TODO: Perhaps this should be gcc_unreachable ()? */
4764 remainder = profile_count::zero ().guessed_local ();
4766 else if (stats.rec_count_sum.nonzero_p ())
4768 int new_nonrec_calls = stats.n_nonrec_calls;
4769 /* There are self-recursive edges which are likely to bring in the
4770 majority of calls but which we must divide in between the original and
4771 new node. */
4772 init_caller_stats (&stats, orig_node);
4773 orig_node->call_for_symbol_thunks_and_aliases (gather_caller_stats,
4774 &stats, false);
4775 int orig_nonrec_calls = stats.n_nonrec_calls;
4776 profile_count orig_nonrec_call_count = stats.count_sum;
4778 if (orig_node->local)
4780 if (!orig_nonrec_call_count.nonzero_p ())
4782 if (dump_file)
4783 fprintf (dump_file, " The original is local and the only "
4784 "incoming edges from non-dead callers with nonzero "
4785 "counts are self-recursive, assuming it is cold.\n");
4786 /* The NEW_NODE count and counts of all its outgoing edges
4787 are still unmodified copies of ORIG_NODE's. Just clear
4788 the latter and bail out. */
4789 profile_count zero;
4790 if (opt_for_fn (orig_node->decl, flag_profile_partial_training))
4791 zero = profile_count::zero ().guessed_local ();
4792 else
4793 zero = profile_count::adjusted_zero ();
4794 orig_node->count = zero;
4795 for (cgraph_edge *cs = orig_node->callees;
4797 cs = cs->next_callee)
4798 cs->count = zero;
4799 for (cgraph_edge *cs = orig_node->indirect_calls;
4801 cs = cs->next_callee)
4802 cs->count = zero;
4803 return;
4806 else
4808 /* Let's behave as if there was another caller that accounts for all
4809 the calls that were either indirect or from other compilation
4810 units. */
4811 orig_nonrec_calls++;
4812 profile_count pretend_caller_count
4813 = (orig_node_count - new_sum - orig_nonrec_call_count
4814 - stats.rec_count_sum);
4815 orig_nonrec_call_count += pretend_caller_count;
4818 /* Divide all "unexplained" counts roughly proportionally to sums of
4819 counts of non-recursive calls.
4821 We put rather arbitrary limits on how many counts we claim because the
4822 number of non-self-recursive incoming count is only a rough guideline
4823 and there are cases (such as mcf) where using it blindly just takes
4824 too many. And if lattices are considered in the opposite order we
4825 could also take too few. */
4826 profile_count unexp = orig_node_count - new_sum - orig_nonrec_call_count;
4828 int limit_den = 2 * (orig_nonrec_calls + new_nonrec_calls);
4829 profile_count new_part
4830 = MAX(MIN (unexp.apply_scale (new_sum,
4831 new_sum + orig_nonrec_call_count),
4832 unexp.apply_scale (limit_den - 1, limit_den)),
4833 unexp.apply_scale (new_nonrec_calls, limit_den));
4834 if (dump_file)
4836 fprintf (dump_file, " Claiming ");
4837 new_part.dump (dump_file);
4838 fprintf (dump_file, " of unexplained ");
4839 unexp.dump (dump_file);
4840 fprintf (dump_file, " counts because of self-recursive "
4841 "calls\n");
4843 new_sum += new_part;
4844 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4845 orig_node);
4847 else
4848 remainder = lenient_count_portion_handling (orig_node_count - new_sum,
4849 orig_node);
4851 new_sum = orig_node_count.combine_with_ipa_count (new_sum);
4852 new_node->count = new_sum;
4853 orig_node->count = remainder;
4855 profile_count orig_new_node_count = orig_node_count;
4856 profile_count::adjust_for_ipa_scaling (&new_sum, &orig_new_node_count);
4857 for (cgraph_edge *cs = new_node->callees; cs; cs = cs->next_callee)
4858 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4859 for (cgraph_edge *cs = new_node->indirect_calls; cs; cs = cs->next_callee)
4860 cs->count = cs->count.apply_scale (new_sum, orig_new_node_count);
4862 profile_count::adjust_for_ipa_scaling (&remainder, &orig_node_count);
4863 for (cgraph_edge *cs = orig_node->callees; cs; cs = cs->next_callee)
4864 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4865 for (cgraph_edge *cs = orig_node->indirect_calls; cs; cs = cs->next_callee)
4866 cs->count = cs->count.apply_scale (remainder, orig_node_count);
4868 if (dump_file)
4870 dump_profile_updates (new_node, true);
4871 dump_profile_updates (orig_node, false);
4875 /* Update the respective profile of specialized NEW_NODE and the original
4876 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4877 have been redirected to the specialized version. */
4879 static void
4880 update_specialized_profile (struct cgraph_node *new_node,
4881 struct cgraph_node *orig_node,
4882 profile_count redirected_sum)
4884 struct cgraph_edge *cs;
4885 profile_count new_node_count, orig_node_count = orig_node->count;
4887 if (dump_file)
4889 fprintf (dump_file, " the sum of counts of redirected edges is ");
4890 redirected_sum.dump (dump_file);
4891 fprintf (dump_file, "\n");
4893 if (!(orig_node_count > profile_count::zero ()))
4894 return;
4896 gcc_assert (orig_node_count >= redirected_sum);
4898 new_node_count = new_node->count;
4899 new_node->count += redirected_sum;
4900 orig_node->count -= redirected_sum;
4902 for (cs = new_node->callees; cs; cs = cs->next_callee)
4903 cs->count += cs->count.apply_scale (redirected_sum, new_node_count);
4905 for (cs = orig_node->callees; cs; cs = cs->next_callee)
4907 profile_count dec = cs->count.apply_scale (redirected_sum,
4908 orig_node_count);
4909 cs->count -= dec;
4912 if (dump_file)
4914 dump_profile_updates (new_node, true);
4915 dump_profile_updates (orig_node, false);
4919 static void adjust_references_in_caller (cgraph_edge *cs,
4920 symtab_node *symbol, int index);
4922 /* Simple structure to pass a symbol and index (with same meaning as parameters
4923 of adjust_references_in_caller) through a void* parameter of a
4924 call_for_symbol_thunks_and_aliases callback. */
4925 struct symbol_and_index_together
4927 symtab_node *symbol;
4928 int index;
4931 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4932 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4933 static bool
4934 adjust_refs_in_act_callers (struct cgraph_node *node, void *data)
4936 symbol_and_index_together *pack = (symbol_and_index_together *) data;
4937 for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller)
4938 if (!cs->caller->thunk)
4939 adjust_references_in_caller (cs, pack->symbol, pack->index);
4940 return false;
4943 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4944 variable which is only dereferenced and which is represented by SYMBOL. See
4945 if we can remove ADDR reference in callers assosiated witht the call. */
4947 static void
4948 adjust_references_in_caller (cgraph_edge *cs, symtab_node *symbol, int index)
4950 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
4951 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, index);
4952 if (jfunc->type == IPA_JF_CONST)
4954 ipa_ref *to_del = cs->caller->find_reference (symbol, cs->call_stmt,
4955 cs->lto_stmt_uid);
4956 if (!to_del)
4957 return;
4958 to_del->remove_reference ();
4959 if (dump_file)
4960 fprintf (dump_file, " Removed a reference from %s to %s.\n",
4961 cs->caller->dump_name (), symbol->dump_name ());
4962 return;
4965 if (jfunc->type != IPA_JF_PASS_THROUGH
4966 || ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
4967 return;
4969 int fidx = ipa_get_jf_pass_through_formal_id (jfunc);
4970 cgraph_node *caller = cs->caller;
4971 ipa_node_params *caller_info = ipa_node_params_sum->get (caller);
4972 /* TODO: This consistency check may be too big and not really
4973 that useful. Consider removing it. */
4974 tree cst;
4975 if (caller_info->ipcp_orig_node)
4976 cst = caller_info->known_csts[fidx];
4977 else
4979 ipcp_lattice<tree> *lat = ipa_get_scalar_lat (caller_info, fidx);
4980 gcc_assert (lat->is_single_const ());
4981 cst = lat->values->value;
4983 gcc_assert (TREE_CODE (cst) == ADDR_EXPR
4984 && (symtab_node::get (get_base_address (TREE_OPERAND (cst, 0)))
4985 == symbol));
4987 int cuses = ipa_get_controlled_uses (caller_info, fidx);
4988 if (cuses == IPA_UNDESCRIBED_USE)
4989 return;
4990 gcc_assert (cuses > 0);
4991 cuses--;
4992 ipa_set_controlled_uses (caller_info, fidx, cuses);
4993 if (cuses)
4994 return;
4996 if (caller_info->ipcp_orig_node)
4998 /* Cloning machinery has created a reference here, we need to either
4999 remove it or change it to a read one. */
5000 ipa_ref *to_del = caller->find_reference (symbol, NULL, 0);
5001 if (to_del && to_del->use == IPA_REF_ADDR)
5003 to_del->remove_reference ();
5004 if (dump_file)
5005 fprintf (dump_file, " Removed a reference from %s to %s.\n",
5006 cs->caller->dump_name (), symbol->dump_name ());
5007 if (ipa_get_param_load_dereferenced (caller_info, fidx))
5009 caller->create_reference (symbol, IPA_REF_LOAD, NULL);
5010 if (dump_file)
5011 fprintf (dump_file,
5012 " ...and replaced it with LOAD one.\n");
5017 symbol_and_index_together pack;
5018 pack.symbol = symbol;
5019 pack.index = fidx;
5020 if (caller->can_change_signature)
5021 caller->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers,
5022 &pack, true);
5026 /* Return true if we would like to remove a parameter from NODE when cloning it
5027 with KNOWN_CSTS scalar constants. */
5029 static bool
5030 want_remove_some_param_p (cgraph_node *node, vec<tree> known_csts)
5032 auto_vec<bool, 16> surviving;
5033 bool filled_vec = false;
5034 ipa_node_params *info = ipa_node_params_sum->get (node);
5035 int i, count = ipa_get_param_count (info);
5037 for (i = 0; i < count; i++)
5039 if (!known_csts[i] && ipa_is_param_used (info, i))
5040 continue;
5042 if (!filled_vec)
5044 clone_info *info = clone_info::get (node);
5045 if (!info || !info->param_adjustments)
5046 return true;
5047 info->param_adjustments->get_surviving_params (&surviving);
5048 filled_vec = true;
5050 if (surviving.length() < (unsigned) i && surviving[i])
5051 return true;
5053 return false;
5056 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5057 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5058 redirect all edges in CALLERS to it. */
5060 static struct cgraph_node *
5061 create_specialized_node (struct cgraph_node *node,
5062 vec<tree> known_csts,
5063 vec<ipa_polymorphic_call_context> known_contexts,
5064 struct ipa_agg_replacement_value *aggvals,
5065 vec<cgraph_edge *> &callers)
5067 ipa_node_params *new_info, *info = ipa_node_params_sum->get (node);
5068 vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
5069 vec<ipa_adjusted_param, va_gc> *new_params = NULL;
5070 struct ipa_agg_replacement_value *av;
5071 struct cgraph_node *new_node;
5072 int i, count = ipa_get_param_count (info);
5073 clone_info *cinfo = clone_info::get (node);
5074 ipa_param_adjustments *old_adjustments = cinfo
5075 ? cinfo->param_adjustments : NULL;
5076 ipa_param_adjustments *new_adjustments;
5077 gcc_assert (!info->ipcp_orig_node);
5078 gcc_assert (node->can_change_signature
5079 || !old_adjustments);
5081 if (old_adjustments)
5083 /* At the moment all IPA optimizations should use the number of
5084 parameters of the prevailing decl as the m_always_copy_start.
5085 Handling any other value would complicate the code below, so for the
5086 time bing let's only assert it is so. */
5087 gcc_assert (old_adjustments->m_always_copy_start == count
5088 || old_adjustments->m_always_copy_start < 0);
5089 int old_adj_count = vec_safe_length (old_adjustments->m_adj_params);
5090 for (i = 0; i < old_adj_count; i++)
5092 ipa_adjusted_param *old_adj = &(*old_adjustments->m_adj_params)[i];
5093 if (!node->can_change_signature
5094 || old_adj->op != IPA_PARAM_OP_COPY
5095 || (!known_csts[old_adj->base_index]
5096 && ipa_is_param_used (info, old_adj->base_index)))
5098 ipa_adjusted_param new_adj = *old_adj;
5100 new_adj.prev_clone_adjustment = true;
5101 new_adj.prev_clone_index = i;
5102 vec_safe_push (new_params, new_adj);
5105 bool skip_return = old_adjustments->m_skip_return;
5106 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5107 ipa_param_adjustments (new_params, count,
5108 skip_return));
5110 else if (node->can_change_signature
5111 && want_remove_some_param_p (node, known_csts))
5113 ipa_adjusted_param adj;
5114 memset (&adj, 0, sizeof (adj));
5115 adj.op = IPA_PARAM_OP_COPY;
5116 for (i = 0; i < count; i++)
5117 if (!known_csts[i] && ipa_is_param_used (info, i))
5119 adj.base_index = i;
5120 adj.prev_clone_index = i;
5121 vec_safe_push (new_params, adj);
5123 new_adjustments = (new (ggc_alloc <ipa_param_adjustments> ())
5124 ipa_param_adjustments (new_params, count, false));
5126 else
5127 new_adjustments = NULL;
5129 auto_vec<cgraph_edge *, 2> self_recursive_calls;
5130 for (i = callers.length () - 1; i >= 0; i--)
5132 cgraph_edge *cs = callers[i];
5133 if (cs->caller == node)
5135 self_recursive_calls.safe_push (cs);
5136 callers.unordered_remove (i);
5139 replace_trees = cinfo ? vec_safe_copy (cinfo->tree_map) : NULL;
5140 for (i = 0; i < count; i++)
5142 tree t = known_csts[i];
5143 if (!t)
5144 continue;
5146 gcc_checking_assert (TREE_CODE (t) != TREE_BINFO);
5148 bool load_ref = false;
5149 symtab_node *ref_symbol;
5150 if (TREE_CODE (t) == ADDR_EXPR)
5152 tree base = get_base_address (TREE_OPERAND (t, 0));
5153 if (TREE_CODE (base) == VAR_DECL
5154 && ipa_get_controlled_uses (info, i) == 0
5155 && ipa_get_param_load_dereferenced (info, i)
5156 && (ref_symbol = symtab_node::get (base)))
5158 load_ref = true;
5159 if (node->can_change_signature)
5160 for (cgraph_edge *caller : callers)
5161 adjust_references_in_caller (caller, ref_symbol, i);
5165 ipa_replace_map *replace_map = get_replacement_map (info, t, i, load_ref);
5166 if (replace_map)
5167 vec_safe_push (replace_trees, replace_map);
5170 unsigned &suffix_counter = clone_num_suffixes->get_or_insert (
5171 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5172 node->decl)));
5173 new_node = node->create_virtual_clone (callers, replace_trees,
5174 new_adjustments, "constprop",
5175 suffix_counter);
5176 suffix_counter++;
5178 bool have_self_recursive_calls = !self_recursive_calls.is_empty ();
5179 for (unsigned j = 0; j < self_recursive_calls.length (); j++)
5181 cgraph_edge *cs = get_next_cgraph_edge_clone (self_recursive_calls[j]);
5182 /* Cloned edges can disappear during cloning as speculation can be
5183 resolved, check that we have one and that it comes from the last
5184 cloning. */
5185 if (cs && cs->caller == new_node)
5186 cs->redirect_callee_duplicating_thunks (new_node);
5187 /* Any future code that would make more than one clone of an outgoing
5188 edge would confuse this mechanism, so let's check that does not
5189 happen. */
5190 gcc_checking_assert (!cs
5191 || !get_next_cgraph_edge_clone (cs)
5192 || get_next_cgraph_edge_clone (cs)->caller != new_node);
5194 if (have_self_recursive_calls)
5195 new_node->expand_all_artificial_thunks ();
5197 ipa_set_node_agg_value_chain (new_node, aggvals);
5198 for (av = aggvals; av; av = av->next)
5199 new_node->maybe_create_reference (av->value, NULL);
5201 if (dump_file && (dump_flags & TDF_DETAILS))
5203 fprintf (dump_file, " the new node is %s.\n", new_node->dump_name ());
5204 if (known_contexts.exists ())
5206 for (i = 0; i < count; i++)
5207 if (!known_contexts[i].useless_p ())
5209 fprintf (dump_file, " known ctx %i is ", i);
5210 known_contexts[i].dump (dump_file);
5213 if (aggvals)
5214 ipa_dump_agg_replacement_values (dump_file, aggvals);
5217 new_info = ipa_node_params_sum->get (new_node);
5218 new_info->ipcp_orig_node = node;
5219 new_node->ipcp_clone = true;
5220 new_info->known_csts = known_csts;
5221 new_info->known_contexts = known_contexts;
5223 ipcp_discover_new_direct_edges (new_node, known_csts, known_contexts, aggvals);
5225 return new_node;
5228 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5229 pass-through function to itself when the cgraph_node involved is not an
5230 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5231 no-operation pass-through. */
5233 static bool
5234 self_recursive_pass_through_p (cgraph_edge *cs, ipa_jump_func *jfunc, int i,
5235 bool simple = true)
5237 enum availability availability;
5238 if (cs->caller == cs->callee->function_symbol (&availability)
5239 && availability > AVAIL_INTERPOSABLE
5240 && jfunc->type == IPA_JF_PASS_THROUGH
5241 && (!simple || ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5242 && ipa_get_jf_pass_through_formal_id (jfunc) == i
5243 && ipa_node_params_sum->get (cs->caller)
5244 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5245 return true;
5246 return false;
5249 /* Return true if JFUNC, which describes a part of an aggregate represented or
5250 pointed to by the i-th parameter of call CS, is a pass-through function to
5251 itself when the cgraph_node involved is not an IPA-CP clone.. When
5252 SIMPLE is true, further check if JFUNC is a simple no-operation
5253 pass-through. */
5255 static bool
5256 self_recursive_agg_pass_through_p (cgraph_edge *cs, ipa_agg_jf_item *jfunc,
5257 int i, bool simple = true)
5259 enum availability availability;
5260 if (cs->caller == cs->callee->function_symbol (&availability)
5261 && availability > AVAIL_INTERPOSABLE
5262 && jfunc->jftype == IPA_JF_LOAD_AGG
5263 && jfunc->offset == jfunc->value.load_agg.offset
5264 && (!simple || jfunc->value.pass_through.operation == NOP_EXPR)
5265 && jfunc->value.pass_through.formal_id == i
5266 && useless_type_conversion_p (jfunc->value.load_agg.type, jfunc->type)
5267 && ipa_node_params_sum->get (cs->caller)
5268 && !ipa_node_params_sum->get (cs->caller)->ipcp_orig_node)
5269 return true;
5270 return false;
5273 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5274 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5276 static void
5277 find_more_scalar_values_for_callers_subset (struct cgraph_node *node,
5278 vec<tree> &known_csts,
5279 const vec<cgraph_edge *> &callers)
5281 ipa_node_params *info = ipa_node_params_sum->get (node);
5282 int i, count = ipa_get_param_count (info);
5284 for (i = 0; i < count; i++)
5286 struct cgraph_edge *cs;
5287 tree newval = NULL_TREE;
5288 int j;
5289 bool first = true;
5290 tree type = ipa_get_type (info, i);
5292 if (ipa_get_scalar_lat (info, i)->bottom || known_csts[i])
5293 continue;
5295 FOR_EACH_VEC_ELT (callers, j, cs)
5297 struct ipa_jump_func *jump_func;
5298 tree t;
5300 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5301 if (!args
5302 || i >= ipa_get_cs_argument_count (args)
5303 || (i == 0
5304 && call_passes_through_thunk (cs)))
5306 newval = NULL_TREE;
5307 break;
5309 jump_func = ipa_get_ith_jump_func (args, i);
5311 /* Besides simple pass-through jump function, arithmetic jump
5312 function could also introduce argument-direct-pass-through for
5313 self-feeding recursive call. For example,
5315 fn (int i)
5317 fn (i & 1);
5320 Given that i is 0, recursive propagation via (i & 1) also gets
5321 0. */
5322 if (self_recursive_pass_through_p (cs, jump_func, i, false))
5324 gcc_assert (newval);
5325 t = ipa_get_jf_arith_result (
5326 ipa_get_jf_pass_through_operation (jump_func),
5327 newval,
5328 ipa_get_jf_pass_through_operand (jump_func),
5329 type);
5331 else
5332 t = ipa_value_from_jfunc (ipa_node_params_sum->get (cs->caller),
5333 jump_func, type);
5334 if (!t
5335 || (newval
5336 && !values_equal_for_ipcp_p (t, newval))
5337 || (!first && !newval))
5339 newval = NULL_TREE;
5340 break;
5342 else
5343 newval = t;
5344 first = false;
5347 if (newval)
5349 if (dump_file && (dump_flags & TDF_DETAILS))
5351 fprintf (dump_file, " adding an extra known scalar value ");
5352 print_ipcp_constant_value (dump_file, newval);
5353 fprintf (dump_file, " for ");
5354 ipa_dump_param (dump_file, info, i);
5355 fprintf (dump_file, "\n");
5358 known_csts[i] = newval;
5363 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5364 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5365 CALLERS. */
5367 static void
5368 find_more_contexts_for_caller_subset (cgraph_node *node,
5369 vec<ipa_polymorphic_call_context>
5370 *known_contexts,
5371 const vec<cgraph_edge *> &callers)
5373 ipa_node_params *info = ipa_node_params_sum->get (node);
5374 int i, count = ipa_get_param_count (info);
5376 for (i = 0; i < count; i++)
5378 cgraph_edge *cs;
5380 if (ipa_get_poly_ctx_lat (info, i)->bottom
5381 || (known_contexts->exists ()
5382 && !(*known_contexts)[i].useless_p ()))
5383 continue;
5385 ipa_polymorphic_call_context newval;
5386 bool first = true;
5387 int j;
5389 FOR_EACH_VEC_ELT (callers, j, cs)
5391 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5392 if (!args
5393 || i >= ipa_get_cs_argument_count (args))
5394 return;
5395 ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
5396 ipa_polymorphic_call_context ctx;
5397 ctx = ipa_context_from_jfunc (ipa_node_params_sum->get (cs->caller),
5398 cs, i, jfunc);
5399 if (first)
5401 newval = ctx;
5402 first = false;
5404 else
5405 newval.meet_with (ctx);
5406 if (newval.useless_p ())
5407 break;
5410 if (!newval.useless_p ())
5412 if (dump_file && (dump_flags & TDF_DETAILS))
5414 fprintf (dump_file, " adding an extra known polymorphic "
5415 "context ");
5416 print_ipcp_constant_value (dump_file, newval);
5417 fprintf (dump_file, " for ");
5418 ipa_dump_param (dump_file, info, i);
5419 fprintf (dump_file, "\n");
5422 if (!known_contexts->exists ())
5423 known_contexts->safe_grow_cleared (ipa_get_param_count (info),
5424 true);
5425 (*known_contexts)[i] = newval;
5431 /* Go through PLATS and create a vector of values consisting of values and
5432 offsets (minus OFFSET) of lattices that contain only a single value. */
5434 static vec<ipa_agg_value>
5435 copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
5437 vec<ipa_agg_value> res = vNULL;
5439 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5440 return vNULL;
5442 for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
5443 if (aglat->is_single_const ())
5445 struct ipa_agg_value ti;
5446 ti.offset = aglat->offset - offset;
5447 ti.value = aglat->values->value;
5448 res.safe_push (ti);
5450 return res;
5453 /* Intersect all values in INTER with single value lattices in PLATS (while
5454 subtracting OFFSET). */
5456 static void
5457 intersect_with_plats (class ipcp_param_lattices *plats,
5458 vec<ipa_agg_value> *inter,
5459 HOST_WIDE_INT offset)
5461 struct ipcp_agg_lattice *aglat;
5462 struct ipa_agg_value *item;
5463 int k;
5465 if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
5467 inter->release ();
5468 return;
5471 aglat = plats->aggs;
5472 FOR_EACH_VEC_ELT (*inter, k, item)
5474 bool found = false;
5475 if (!item->value)
5476 continue;
5477 while (aglat)
5479 if (aglat->offset - offset > item->offset)
5480 break;
5481 if (aglat->offset - offset == item->offset)
5483 if (aglat->is_single_const ())
5485 tree value = aglat->values->value;
5487 if (values_equal_for_ipcp_p (item->value, value))
5488 found = true;
5490 break;
5492 aglat = aglat->next;
5494 if (!found)
5495 item->value = NULL_TREE;
5499 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5500 vector result while subtracting OFFSET from the individual value offsets. */
5502 static vec<ipa_agg_value>
5503 agg_replacements_to_vector (struct cgraph_node *node, int index,
5504 HOST_WIDE_INT offset)
5506 struct ipa_agg_replacement_value *av;
5507 vec<ipa_agg_value> res = vNULL;
5509 for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
5510 if (av->index == index
5511 && (av->offset - offset) >= 0)
5513 struct ipa_agg_value item;
5514 gcc_checking_assert (av->value);
5515 item.offset = av->offset - offset;
5516 item.value = av->value;
5517 res.safe_push (item);
5520 return res;
5523 /* Intersect all values in INTER with those that we have already scheduled to
5524 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5525 (while subtracting OFFSET). */
5527 static void
5528 intersect_with_agg_replacements (struct cgraph_node *node, int index,
5529 vec<ipa_agg_value> *inter,
5530 HOST_WIDE_INT offset)
5532 struct ipa_agg_replacement_value *srcvals;
5533 struct ipa_agg_value *item;
5534 int i;
5536 srcvals = ipa_get_agg_replacements_for_node (node);
5537 if (!srcvals)
5539 inter->release ();
5540 return;
5543 FOR_EACH_VEC_ELT (*inter, i, item)
5545 struct ipa_agg_replacement_value *av;
5546 bool found = false;
5547 if (!item->value)
5548 continue;
5549 for (av = srcvals; av; av = av->next)
5551 gcc_checking_assert (av->value);
5552 if (av->index == index
5553 && av->offset - offset == item->offset)
5555 if (values_equal_for_ipcp_p (item->value, av->value))
5556 found = true;
5557 break;
5560 if (!found)
5561 item->value = NULL_TREE;
5565 /* Intersect values in INTER with aggregate values that come along edge CS to
5566 parameter number INDEX and return it. If INTER does not actually exist yet,
5567 copy all incoming values to it. If we determine we ended up with no values
5568 whatsoever, return a released vector. */
5570 static vec<ipa_agg_value>
5571 intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
5572 vec<ipa_agg_value> inter)
5574 struct ipa_jump_func *jfunc;
5575 jfunc = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), index);
5576 if (jfunc->type == IPA_JF_PASS_THROUGH
5577 && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
5579 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5580 int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
5582 if (caller_info->ipcp_orig_node)
5584 struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
5585 class ipcp_param_lattices *orig_plats;
5586 ipa_node_params *orig_info = ipa_node_params_sum->get (orig_node);
5587 orig_plats = ipa_get_parm_lattices (orig_info, src_idx);
5588 if (agg_pass_through_permissible_p (orig_plats, jfunc))
5590 if (!inter.exists ())
5591 inter = agg_replacements_to_vector (cs->caller, src_idx, 0);
5592 else
5593 intersect_with_agg_replacements (cs->caller, src_idx,
5594 &inter, 0);
5595 return inter;
5598 else
5600 class ipcp_param_lattices *src_plats;
5601 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5602 if (agg_pass_through_permissible_p (src_plats, jfunc))
5604 /* Currently we do not produce clobber aggregate jump
5605 functions, adjust when we do. */
5606 gcc_checking_assert (!jfunc->agg.items);
5607 if (!inter.exists ())
5608 inter = copy_plats_to_inter (src_plats, 0);
5609 else
5610 intersect_with_plats (src_plats, &inter, 0);
5611 return inter;
5615 else if (jfunc->type == IPA_JF_ANCESTOR
5616 && ipa_get_jf_ancestor_agg_preserved (jfunc))
5618 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5619 int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
5620 class ipcp_param_lattices *src_plats;
5621 HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
5623 if (caller_info->ipcp_orig_node)
5625 if (!inter.exists ())
5626 inter = agg_replacements_to_vector (cs->caller, src_idx, delta);
5627 else
5628 intersect_with_agg_replacements (cs->caller, src_idx, &inter,
5629 delta);
5631 else
5633 src_plats = ipa_get_parm_lattices (caller_info, src_idx);
5634 /* Currently we do not produce clobber aggregate jump
5635 functions, adjust when we do. */
5636 gcc_checking_assert (!src_plats->aggs || !jfunc->agg.items);
5637 if (!inter.exists ())
5638 inter = copy_plats_to_inter (src_plats, delta);
5639 else
5640 intersect_with_plats (src_plats, &inter, delta);
5642 return inter;
5645 if (jfunc->agg.items)
5647 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
5648 struct ipa_agg_value *item;
5649 int k;
5651 if (!inter.exists ())
5652 for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
5654 struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
5655 tree value = ipa_agg_value_from_node (caller_info, cs->caller,
5656 agg_item);
5657 if (value)
5659 struct ipa_agg_value agg_value;
5661 agg_value.value = value;
5662 agg_value.offset = agg_item->offset;
5663 inter.safe_push (agg_value);
5666 else
5667 FOR_EACH_VEC_ELT (inter, k, item)
5669 int l = 0;
5670 bool found = false;
5672 if (!item->value)
5673 continue;
5675 while ((unsigned) l < jfunc->agg.items->length ())
5677 struct ipa_agg_jf_item *ti;
5678 ti = &(*jfunc->agg.items)[l];
5679 if (ti->offset > item->offset)
5680 break;
5681 if (ti->offset == item->offset)
5683 tree value;
5685 /* Besides simple pass-through aggregate jump function,
5686 arithmetic aggregate jump function could also bring
5687 same aggregate value as parameter passed-in for
5688 self-feeding recursive call. For example,
5690 fn (int *i)
5692 int j = *i & 1;
5693 fn (&j);
5696 Given that *i is 0, recursive propagation via (*i & 1)
5697 also gets 0. */
5698 if (self_recursive_agg_pass_through_p (cs, ti, index,
5699 false))
5700 value = ipa_get_jf_arith_result (
5701 ti->value.pass_through.operation,
5702 item->value,
5703 ti->value.pass_through.operand,
5704 ti->type);
5705 else
5706 value = ipa_agg_value_from_node (caller_info,
5707 cs->caller, ti);
5709 if (value && values_equal_for_ipcp_p (item->value, value))
5710 found = true;
5711 break;
5713 l++;
5715 if (!found)
5716 item->value = NULL;
5719 else
5721 inter.release ();
5722 return vNULL;
5724 return inter;
5727 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5728 from all of them. */
5730 static struct ipa_agg_replacement_value *
5731 find_aggregate_values_for_callers_subset (struct cgraph_node *node,
5732 const vec<cgraph_edge *> &callers)
5734 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5735 struct ipa_agg_replacement_value *res;
5736 struct ipa_agg_replacement_value **tail = &res;
5737 struct cgraph_edge *cs;
5738 int i, j, count = ipa_get_param_count (dest_info);
5740 FOR_EACH_VEC_ELT (callers, j, cs)
5742 ipa_edge_args *args = ipa_edge_args_sum->get (cs);
5743 if (!args)
5745 count = 0;
5746 break;
5748 int c = ipa_get_cs_argument_count (args);
5749 if (c < count)
5750 count = c;
5753 for (i = 0; i < count; i++)
5755 struct cgraph_edge *cs;
5756 vec<ipa_agg_value> inter = vNULL;
5757 struct ipa_agg_value *item;
5758 class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
5759 int j;
5761 /* Among other things, the following check should deal with all by_ref
5762 mismatches. */
5763 if (plats->aggs_bottom)
5764 continue;
5766 FOR_EACH_VEC_ELT (callers, j, cs)
5768 struct ipa_jump_func *jfunc
5769 = ipa_get_ith_jump_func (ipa_edge_args_sum->get (cs), i);
5770 if (self_recursive_pass_through_p (cs, jfunc, i)
5771 && (!plats->aggs_by_ref
5772 || ipa_get_jf_pass_through_agg_preserved (jfunc)))
5773 continue;
5774 inter = intersect_aggregates_with_edge (cs, i, inter);
5776 if (!inter.exists ())
5777 goto next_param;
5780 FOR_EACH_VEC_ELT (inter, j, item)
5782 struct ipa_agg_replacement_value *v;
5784 if (!item->value)
5785 continue;
5787 v = ggc_alloc<ipa_agg_replacement_value> ();
5788 v->index = i;
5789 v->offset = item->offset;
5790 v->value = item->value;
5791 v->by_ref = plats->aggs_by_ref;
5792 *tail = v;
5793 tail = &v->next;
5796 next_param:
5797 if (inter.exists ())
5798 inter.release ();
5800 *tail = NULL;
5801 return res;
5804 /* Determine whether CS also brings all scalar values that the NODE is
5805 specialized for. */
5807 static bool
5808 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
5809 struct cgraph_node *node)
5811 ipa_node_params *dest_info = ipa_node_params_sum->get (node);
5812 int count = ipa_get_param_count (dest_info);
5813 class ipa_node_params *caller_info;
5814 class ipa_edge_args *args;
5815 int i;
5817 caller_info = ipa_node_params_sum->get (cs->caller);
5818 args = ipa_edge_args_sum->get (cs);
5819 for (i = 0; i < count; i++)
5821 struct ipa_jump_func *jump_func;
5822 tree val, t;
5824 val = dest_info->known_csts[i];
5825 if (!val)
5826 continue;
5828 if (i >= ipa_get_cs_argument_count (args))
5829 return false;
5830 jump_func = ipa_get_ith_jump_func (args, i);
5831 t = ipa_value_from_jfunc (caller_info, jump_func,
5832 ipa_get_type (dest_info, i));
5833 if (!t || !values_equal_for_ipcp_p (val, t))
5834 return false;
5836 return true;
5839 /* Determine whether CS also brings all aggregate values that NODE is
5840 specialized for. */
5841 static bool
5842 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
5843 struct cgraph_node *node)
5845 struct ipa_agg_replacement_value *aggval;
5846 int i, ec, count;
5848 aggval = ipa_get_agg_replacements_for_node (node);
5849 if (!aggval)
5850 return true;
5852 ipa_node_params *clone_node_info = ipa_node_params_sum->get (node);
5853 count = ipa_get_param_count (clone_node_info);
5854 ec = ipa_get_cs_argument_count (ipa_edge_args_sum->get (cs));
5855 if (ec < count)
5856 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5857 if (aggval->index >= ec)
5858 return false;
5860 ipa_node_params *orig_node_info
5861 = ipa_node_params_sum->get (clone_node_info->ipcp_orig_node);
5863 for (i = 0; i < count; i++)
5865 class ipcp_param_lattices *plats;
5866 bool interesting = false;
5867 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5868 if (aggval->index == i)
5870 interesting = true;
5871 break;
5873 if (!interesting)
5874 continue;
5876 plats = ipa_get_parm_lattices (orig_node_info, aggval->index);
5877 if (plats->aggs_bottom)
5878 return false;
5880 vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
5881 if (!values.exists ())
5882 return false;
5884 for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
5885 if (aggval->index == i)
5887 struct ipa_agg_value *item;
5888 int j;
5889 bool found = false;
5890 FOR_EACH_VEC_ELT (values, j, item)
5891 if (item->value
5892 && item->offset == av->offset
5893 && values_equal_for_ipcp_p (item->value, av->value))
5895 found = true;
5896 break;
5898 if (!found)
5900 values.release ();
5901 return false;
5904 values.release ();
5906 return true;
5909 /* Given an original NODE and a VAL for which we have already created a
5910 specialized clone, look whether there are incoming edges that still lead
5911 into the old node but now also bring the requested value and also conform to
5912 all other criteria such that they can be redirected the special node.
5913 This function can therefore redirect the final edge in a SCC. */
5915 template <typename valtype>
5916 static void
5917 perhaps_add_new_callers (cgraph_node *node, ipcp_value<valtype> *val)
5919 ipcp_value_source<valtype> *src;
5920 profile_count redirected_sum = profile_count::zero ();
5922 for (src = val->sources; src; src = src->next)
5924 struct cgraph_edge *cs = src->cs;
5925 while (cs)
5927 if (cgraph_edge_brings_value_p (cs, src, node, val)
5928 && cgraph_edge_brings_all_scalars_for_node (cs, val->spec_node)
5929 && cgraph_edge_brings_all_agg_vals_for_node (cs, val->spec_node))
5931 if (dump_file)
5932 fprintf (dump_file, " - adding an extra caller %s of %s\n",
5933 cs->caller->dump_name (),
5934 val->spec_node->dump_name ());
5936 cs->redirect_callee_duplicating_thunks (val->spec_node);
5937 val->spec_node->expand_all_artificial_thunks ();
5938 if (cs->count.ipa ().initialized_p ())
5939 redirected_sum = redirected_sum + cs->count.ipa ();
5941 cs = get_next_cgraph_edge_clone (cs);
5945 if (redirected_sum.nonzero_p ())
5946 update_specialized_profile (val->spec_node, node, redirected_sum);
5949 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5951 static bool
5952 known_contexts_useful_p (vec<ipa_polymorphic_call_context> known_contexts)
5954 ipa_polymorphic_call_context *ctx;
5955 int i;
5957 FOR_EACH_VEC_ELT (known_contexts, i, ctx)
5958 if (!ctx->useless_p ())
5959 return true;
5960 return false;
5963 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5965 static vec<ipa_polymorphic_call_context>
5966 copy_useful_known_contexts (const vec<ipa_polymorphic_call_context> &known_contexts)
5968 if (known_contexts_useful_p (known_contexts))
5969 return known_contexts.copy ();
5970 else
5971 return vNULL;
5974 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5975 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5976 copy too. */
5978 static void
5979 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5980 vec<tree> *known_csts,
5981 vec<ipa_polymorphic_call_context> *known_contexts,
5982 ipcp_value<tree> *val, int index)
5984 *known_csts = avals->m_known_vals.copy ();
5985 *known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
5986 (*known_csts)[index] = val->value;
5989 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5990 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5991 INDEX. */
5993 static void
5994 copy_known_vectors_add_val (ipa_auto_call_arg_values *avals,
5995 vec<tree> *known_csts,
5996 vec<ipa_polymorphic_call_context> *known_contexts,
5997 ipcp_value<ipa_polymorphic_call_context> *val,
5998 int index)
6000 *known_csts = avals->m_known_vals.copy ();
6001 *known_contexts = avals->m_known_contexts.copy ();
6002 (*known_contexts)[index] = val->value;
6005 /* Return true if OFFSET indicates this was not an aggregate value or there is
6006 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6007 AGGVALS list. */
6009 DEBUG_FUNCTION bool
6010 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *aggvals,
6011 int index, HOST_WIDE_INT offset, tree value)
6013 if (offset == -1)
6014 return true;
6016 while (aggvals)
6018 if (aggvals->index == index
6019 && aggvals->offset == offset
6020 && values_equal_for_ipcp_p (aggvals->value, value))
6021 return true;
6022 aggvals = aggvals->next;
6024 return false;
6027 /* Return true if offset is minus one because source of a polymorphic context
6028 cannot be an aggregate value. */
6030 DEBUG_FUNCTION bool
6031 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value *,
6032 int , HOST_WIDE_INT offset,
6033 ipa_polymorphic_call_context)
6035 return offset == -1;
6038 /* Decide whether to create a special version of NODE for value VAL of
6039 parameter at the given INDEX. If OFFSET is -1, the value is for the
6040 parameter itself, otherwise it is stored at the given OFFSET of the
6041 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6042 is a vector which contains clones created for self-recursive calls with an
6043 arithmetic pass-through jump function. */
6045 template <typename valtype>
6046 static bool
6047 decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
6048 ipcp_value<valtype> *val, ipa_auto_call_arg_values *avals,
6049 vec<cgraph_node *> *self_gen_clones)
6051 struct ipa_agg_replacement_value *aggvals;
6052 int caller_count;
6053 sreal freq_sum;
6054 profile_count count_sum, rec_count_sum;
6055 vec<cgraph_edge *> callers;
6057 if (val->spec_node)
6059 perhaps_add_new_callers (node, val);
6060 return false;
6062 else if (val->local_size_cost + overall_size > get_max_overall_size (node))
6064 if (dump_file && (dump_flags & TDF_DETAILS))
6065 fprintf (dump_file, " Ignoring candidate value because "
6066 "maximum unit size would be reached with %li.\n",
6067 val->local_size_cost + overall_size);
6068 return false;
6070 else if (!get_info_about_necessary_edges (val, node, &freq_sum, &caller_count,
6071 &rec_count_sum, &count_sum))
6072 return false;
6074 if (!dbg_cnt (ipa_cp_values))
6075 return false;
6077 if (val->self_recursion_generated_p ())
6079 /* The edge counts in this case might not have been adjusted yet.
6080 Nevertleless, even if they were it would be only a guesswork which we
6081 can do now. The recursive part of the counts can be derived from the
6082 count of the original node anyway. */
6083 if (node->count.ipa ().nonzero_p ())
6085 unsigned dem = self_gen_clones->length () + 1;
6086 rec_count_sum = node->count.ipa ().apply_scale (1, dem);
6088 else
6089 rec_count_sum = profile_count::zero ();
6092 /* get_info_about_necessary_edges only sums up ipa counts. */
6093 count_sum += rec_count_sum;
6095 if (dump_file && (dump_flags & TDF_DETAILS))
6097 fprintf (dump_file, " - considering value ");
6098 print_ipcp_constant_value (dump_file, val->value);
6099 fprintf (dump_file, " for ");
6100 ipa_dump_param (dump_file, ipa_node_params_sum->get (node), index);
6101 if (offset != -1)
6102 fprintf (dump_file, ", offset: " HOST_WIDE_INT_PRINT_DEC, offset);
6103 fprintf (dump_file, " (caller_count: %i)\n", caller_count);
6106 if (!good_cloning_opportunity_p (node, val->local_time_benefit,
6107 freq_sum, count_sum,
6108 val->local_size_cost)
6109 && !good_cloning_opportunity_p (node, val->prop_time_benefit,
6110 freq_sum, count_sum, val->prop_size_cost))
6111 return false;
6113 if (dump_file)
6114 fprintf (dump_file, " Creating a specialized node of %s.\n",
6115 node->dump_name ());
6117 vec<tree> known_csts;
6118 vec<ipa_polymorphic_call_context> known_contexts;
6120 callers = gather_edges_for_value (val, node, caller_count);
6121 if (offset == -1)
6122 copy_known_vectors_add_val (avals, &known_csts, &known_contexts, val, index);
6123 else
6125 known_csts = avals->m_known_vals.copy ();
6126 known_contexts = copy_useful_known_contexts (avals->m_known_contexts);
6128 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6129 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6130 aggvals = find_aggregate_values_for_callers_subset (node, callers);
6131 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals, index,
6132 offset, val->value));
6133 val->spec_node = create_specialized_node (node, known_csts, known_contexts,
6134 aggvals, callers);
6136 if (val->self_recursion_generated_p ())
6137 self_gen_clones->safe_push (val->spec_node);
6138 else
6139 update_profiling_info (node, val->spec_node);
6141 callers.release ();
6142 overall_size += val->local_size_cost;
6143 if (dump_file && (dump_flags & TDF_DETAILS))
6144 fprintf (dump_file, " overall size reached %li\n",
6145 overall_size);
6147 /* TODO: If for some lattice there is only one other known value
6148 left, make a special node for it too. */
6150 return true;
6153 /* Decide whether and what specialized clones of NODE should be created. */
6155 static bool
6156 decide_whether_version_node (struct cgraph_node *node)
6158 ipa_node_params *info = ipa_node_params_sum->get (node);
6159 int i, count = ipa_get_param_count (info);
6160 bool ret = false;
6162 if (count == 0)
6163 return false;
6165 if (dump_file && (dump_flags & TDF_DETAILS))
6166 fprintf (dump_file, "\nEvaluating opportunities for %s.\n",
6167 node->dump_name ());
6169 auto_vec <cgraph_node *, 9> self_gen_clones;
6170 ipa_auto_call_arg_values avals;
6171 gather_context_independent_values (info, &avals, false, NULL);
6173 for (i = 0; i < count;i++)
6175 class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6176 ipcp_lattice<tree> *lat = &plats->itself;
6177 ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
6179 if (!lat->bottom
6180 && !avals.m_known_vals[i])
6182 ipcp_value<tree> *val;
6183 for (val = lat->values; val; val = val->next)
6185 /* If some values generated for self-recursive calls with
6186 arithmetic jump functions fall outside of the known
6187 value_range for the parameter, we can skip them. VR interface
6188 supports this only for integers now. */
6189 if (TREE_CODE (val->value) == INTEGER_CST
6190 && !plats->m_value_range.bottom_p ()
6191 && !plats->m_value_range.m_vr.contains_p (val->value))
6193 /* This can happen also if a constant present in the source
6194 code falls outside of the range of parameter's type, so we
6195 cannot assert. */
6196 if (dump_file && (dump_flags & TDF_DETAILS))
6198 fprintf (dump_file, " - skipping%s value ",
6199 val->self_recursion_generated_p ()
6200 ? " self_recursion_generated" : "");
6201 print_ipcp_constant_value (dump_file, val->value);
6202 fprintf (dump_file, " because it is outside known "
6203 "value range.\n");
6205 continue;
6207 ret |= decide_about_value (node, i, -1, val, &avals,
6208 &self_gen_clones);
6212 if (!plats->aggs_bottom)
6214 struct ipcp_agg_lattice *aglat;
6215 ipcp_value<tree> *val;
6216 for (aglat = plats->aggs; aglat; aglat = aglat->next)
6217 if (!aglat->bottom && aglat->values
6218 /* If the following is false, the one value has been considered
6219 for cloning for all contexts. */
6220 && (plats->aggs_contain_variable
6221 || !aglat->is_single_const ()))
6222 for (val = aglat->values; val; val = val->next)
6223 ret |= decide_about_value (node, i, aglat->offset, val, &avals,
6224 &self_gen_clones);
6227 if (!ctxlat->bottom
6228 && avals.m_known_contexts[i].useless_p ())
6230 ipcp_value<ipa_polymorphic_call_context> *val;
6231 for (val = ctxlat->values; val; val = val->next)
6232 ret |= decide_about_value (node, i, -1, val, &avals,
6233 &self_gen_clones);
6237 if (!self_gen_clones.is_empty ())
6239 self_gen_clones.safe_push (node);
6240 update_counts_for_self_gen_clones (node, self_gen_clones);
6243 if (info->do_clone_for_all_contexts)
6245 if (!dbg_cnt (ipa_cp_values))
6247 info->do_clone_for_all_contexts = false;
6248 return ret;
6251 struct cgraph_node *clone;
6252 auto_vec<cgraph_edge *> callers = node->collect_callers ();
6254 for (int i = callers.length () - 1; i >= 0; i--)
6256 cgraph_edge *cs = callers[i];
6257 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6259 if (caller_info && caller_info->node_dead)
6260 callers.unordered_remove (i);
6263 if (!adjust_callers_for_value_intersection (callers, node))
6265 /* If node is not called by anyone, or all its caller edges are
6266 self-recursive, the node is not really in use, no need to do
6267 cloning. */
6268 info->do_clone_for_all_contexts = false;
6269 return ret;
6272 if (dump_file)
6273 fprintf (dump_file, " - Creating a specialized node of %s "
6274 "for all known contexts.\n", node->dump_name ());
6276 vec<tree> known_csts = avals.m_known_vals.copy ();
6277 vec<ipa_polymorphic_call_context> known_contexts
6278 = copy_useful_known_contexts (avals.m_known_contexts);
6279 find_more_scalar_values_for_callers_subset (node, known_csts, callers);
6280 find_more_contexts_for_caller_subset (node, &known_contexts, callers);
6281 ipa_agg_replacement_value *aggvals
6282 = find_aggregate_values_for_callers_subset (node, callers);
6284 if (!known_contexts_useful_p (known_contexts))
6286 known_contexts.release ();
6287 known_contexts = vNULL;
6289 clone = create_specialized_node (node, known_csts, known_contexts,
6290 aggvals, callers);
6291 info->do_clone_for_all_contexts = false;
6292 ipa_node_params_sum->get (clone)->is_all_contexts_clone = true;
6293 ret = true;
6296 return ret;
6299 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6301 static void
6302 spread_undeadness (struct cgraph_node *node)
6304 struct cgraph_edge *cs;
6306 for (cs = node->callees; cs; cs = cs->next_callee)
6307 if (ipa_edge_within_scc (cs))
6309 struct cgraph_node *callee;
6310 class ipa_node_params *info;
6312 callee = cs->callee->function_symbol (NULL);
6313 info = ipa_node_params_sum->get (callee);
6315 if (info && info->node_dead)
6317 info->node_dead = 0;
6318 spread_undeadness (callee);
6323 /* Return true if NODE has a caller from outside of its SCC that is not
6324 dead. Worker callback for cgraph_for_node_and_aliases. */
6326 static bool
6327 has_undead_caller_from_outside_scc_p (struct cgraph_node *node,
6328 void *data ATTRIBUTE_UNUSED)
6330 struct cgraph_edge *cs;
6332 for (cs = node->callers; cs; cs = cs->next_caller)
6333 if (cs->caller->thunk
6334 && cs->caller->call_for_symbol_thunks_and_aliases
6335 (has_undead_caller_from_outside_scc_p, NULL, true))
6336 return true;
6337 else if (!ipa_edge_within_scc (cs))
6339 ipa_node_params *caller_info = ipa_node_params_sum->get (cs->caller);
6340 if (!caller_info /* Unoptimized caller are like dead ones. */
6341 || !caller_info->node_dead)
6342 return true;
6344 return false;
6348 /* Identify nodes within the same SCC as NODE which are no longer needed
6349 because of new clones and will be removed as unreachable. */
6351 static void
6352 identify_dead_nodes (struct cgraph_node *node)
6354 struct cgraph_node *v;
6355 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6356 if (v->local)
6358 ipa_node_params *info = ipa_node_params_sum->get (v);
6359 if (info
6360 && !v->call_for_symbol_thunks_and_aliases
6361 (has_undead_caller_from_outside_scc_p, NULL, true))
6362 info->node_dead = 1;
6365 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6367 ipa_node_params *info = ipa_node_params_sum->get (v);
6368 if (info && !info->node_dead)
6369 spread_undeadness (v);
6372 if (dump_file && (dump_flags & TDF_DETAILS))
6374 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6375 if (ipa_node_params_sum->get (v)
6376 && ipa_node_params_sum->get (v)->node_dead)
6377 fprintf (dump_file, " Marking node as dead: %s.\n",
6378 v->dump_name ());
6382 /* The decision stage. Iterate over the topological order of call graph nodes
6383 TOPO and make specialized clones if deemed beneficial. */
6385 static void
6386 ipcp_decision_stage (class ipa_topo_info *topo)
6388 int i;
6390 if (dump_file)
6391 fprintf (dump_file, "\nIPA decision stage:\n\n");
6393 for (i = topo->nnodes - 1; i >= 0; i--)
6395 struct cgraph_node *node = topo->order[i];
6396 bool change = false, iterate = true;
6398 while (iterate)
6400 struct cgraph_node *v;
6401 iterate = false;
6402 for (v = node; v; v = ((struct ipa_dfs_info *) v->aux)->next_cycle)
6403 if (v->has_gimple_body_p ()
6404 && ipcp_versionable_function_p (v))
6405 iterate |= decide_whether_version_node (v);
6407 change |= iterate;
6409 if (change)
6410 identify_dead_nodes (node);
6414 /* Look up all the bits information that we have discovered and copy it over
6415 to the transformation summary. */
6417 static void
6418 ipcp_store_bits_results (void)
6420 cgraph_node *node;
6422 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6424 ipa_node_params *info = ipa_node_params_sum->get (node);
6425 bool dumped_sth = false;
6426 bool found_useful_result = false;
6428 if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
6430 if (dump_file)
6431 fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
6432 "; -fipa-bit-cp: disabled.\n",
6433 node->dump_name ());
6434 continue;
6437 if (info->ipcp_orig_node)
6438 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6439 if (!info->lattices)
6440 /* Newly expanded artificial thunks do not have lattices. */
6441 continue;
6443 unsigned count = ipa_get_param_count (info);
6444 for (unsigned i = 0; i < count; i++)
6446 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6447 if (plats->bits_lattice.constant_p ())
6449 found_useful_result = true;
6450 break;
6454 if (!found_useful_result)
6455 continue;
6457 ipcp_transformation_initialize ();
6458 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6459 vec_safe_reserve_exact (ts->bits, count);
6461 for (unsigned i = 0; i < count; i++)
6463 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6464 ipa_bits *jfbits;
6466 if (plats->bits_lattice.constant_p ())
6468 jfbits
6469 = ipa_get_ipa_bits_for_value (plats->bits_lattice.get_value (),
6470 plats->bits_lattice.get_mask ());
6471 if (!dbg_cnt (ipa_cp_bits))
6472 jfbits = NULL;
6474 else
6475 jfbits = NULL;
6477 ts->bits->quick_push (jfbits);
6478 if (!dump_file || !jfbits)
6479 continue;
6480 if (!dumped_sth)
6482 fprintf (dump_file, "Propagated bits info for function %s:\n",
6483 node->dump_name ());
6484 dumped_sth = true;
6486 fprintf (dump_file, " param %i: value = ", i);
6487 print_hex (jfbits->value, dump_file);
6488 fprintf (dump_file, ", mask = ");
6489 print_hex (jfbits->mask, dump_file);
6490 fprintf (dump_file, "\n");
6495 /* Look up all VR information that we have discovered and copy it over
6496 to the transformation summary. */
6498 static void
6499 ipcp_store_vr_results (void)
6501 cgraph_node *node;
6503 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6505 ipa_node_params *info = ipa_node_params_sum->get (node);
6506 bool found_useful_result = false;
6508 if (!info || !opt_for_fn (node->decl, flag_ipa_vrp))
6510 if (dump_file)
6511 fprintf (dump_file, "Not considering %s for VR discovery "
6512 "and propagate; -fipa-ipa-vrp: disabled.\n",
6513 node->dump_name ());
6514 continue;
6517 if (info->ipcp_orig_node)
6518 info = ipa_node_params_sum->get (info->ipcp_orig_node);
6519 if (!info->lattices)
6520 /* Newly expanded artificial thunks do not have lattices. */
6521 continue;
6523 unsigned count = ipa_get_param_count (info);
6524 for (unsigned i = 0; i < count; i++)
6526 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6527 if (!plats->m_value_range.bottom_p ()
6528 && !plats->m_value_range.top_p ())
6530 found_useful_result = true;
6531 break;
6534 if (!found_useful_result)
6535 continue;
6537 ipcp_transformation_initialize ();
6538 ipcp_transformation *ts = ipcp_transformation_sum->get_create (node);
6539 vec_safe_reserve_exact (ts->m_vr, count);
6541 for (unsigned i = 0; i < count; i++)
6543 ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
6544 ipa_vr vr;
6546 if (!plats->m_value_range.bottom_p ()
6547 && !plats->m_value_range.top_p ()
6548 && dbg_cnt (ipa_cp_vr))
6550 vr.known = true;
6551 vr.type = plats->m_value_range.m_vr.kind ();
6552 vr.min = wi::to_wide (plats->m_value_range.m_vr.min ());
6553 vr.max = wi::to_wide (plats->m_value_range.m_vr.max ());
6555 else
6557 vr.known = false;
6558 vr.type = VR_VARYING;
6559 vr.min = vr.max = wi::zero (INT_TYPE_SIZE);
6561 ts->m_vr->quick_push (vr);
6566 /* The IPCP driver. */
6568 static unsigned int
6569 ipcp_driver (void)
6571 class ipa_topo_info topo;
6573 if (edge_clone_summaries == NULL)
6574 edge_clone_summaries = new edge_clone_summary_t (symtab);
6576 ipa_check_create_node_params ();
6577 ipa_check_create_edge_args ();
6578 clone_num_suffixes = new hash_map<const char *, unsigned>;
6580 if (dump_file)
6582 fprintf (dump_file, "\nIPA structures before propagation:\n");
6583 if (dump_flags & TDF_DETAILS)
6584 ipa_print_all_params (dump_file);
6585 ipa_print_all_jump_functions (dump_file);
6588 /* Topological sort. */
6589 build_toporder_info (&topo);
6590 /* Do the interprocedural propagation. */
6591 ipcp_propagate_stage (&topo);
6592 /* Decide what constant propagation and cloning should be performed. */
6593 ipcp_decision_stage (&topo);
6594 /* Store results of bits propagation. */
6595 ipcp_store_bits_results ();
6596 /* Store results of value range propagation. */
6597 ipcp_store_vr_results ();
6599 /* Free all IPCP structures. */
6600 delete clone_num_suffixes;
6601 free_toporder_info (&topo);
6602 delete edge_clone_summaries;
6603 edge_clone_summaries = NULL;
6604 ipa_free_all_structures_after_ipa_cp ();
6605 if (dump_file)
6606 fprintf (dump_file, "\nIPA constant propagation end\n");
6607 return 0;
6610 /* Initialization and computation of IPCP data structures. This is the initial
6611 intraprocedural analysis of functions, which gathers information to be
6612 propagated later on. */
6614 static void
6615 ipcp_generate_summary (void)
6617 struct cgraph_node *node;
6619 if (dump_file)
6620 fprintf (dump_file, "\nIPA constant propagation start:\n");
6621 ipa_register_cgraph_hooks ();
6623 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
6624 ipa_analyze_node (node);
6627 namespace {
6629 const pass_data pass_data_ipa_cp =
6631 IPA_PASS, /* type */
6632 "cp", /* name */
6633 OPTGROUP_NONE, /* optinfo_flags */
6634 TV_IPA_CONSTANT_PROP, /* tv_id */
6635 0, /* properties_required */
6636 0, /* properties_provided */
6637 0, /* properties_destroyed */
6638 0, /* todo_flags_start */
6639 ( TODO_dump_symtab | TODO_remove_functions ), /* todo_flags_finish */
6642 class pass_ipa_cp : public ipa_opt_pass_d
6644 public:
6645 pass_ipa_cp (gcc::context *ctxt)
6646 : ipa_opt_pass_d (pass_data_ipa_cp, ctxt,
6647 ipcp_generate_summary, /* generate_summary */
6648 NULL, /* write_summary */
6649 NULL, /* read_summary */
6650 ipcp_write_transformation_summaries, /*
6651 write_optimization_summary */
6652 ipcp_read_transformation_summaries, /*
6653 read_optimization_summary */
6654 NULL, /* stmt_fixup */
6655 0, /* function_transform_todo_flags_start */
6656 ipcp_transform_function, /* function_transform */
6657 NULL) /* variable_transform */
6660 /* opt_pass methods: */
6661 virtual bool gate (function *)
6663 /* FIXME: We should remove the optimize check after we ensure we never run
6664 IPA passes when not optimizing. */
6665 return (flag_ipa_cp && optimize) || in_lto_p;
6668 virtual unsigned int execute (function *) { return ipcp_driver (); }
6670 }; // class pass_ipa_cp
6672 } // anon namespace
6674 ipa_opt_pass_d *
6675 make_pass_ipa_cp (gcc::context *ctxt)
6677 return new pass_ipa_cp (ctxt);
6680 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6681 within the same process. For use by toplev::finalize. */
6683 void
6684 ipa_cp_cc_finalize (void)
6686 base_count = profile_count::uninitialized ();
6687 overall_size = 0;
6688 orig_overall_size = 0;
6689 ipcp_free_transformation_sum ();