1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
105 #include "coretypes.h"
108 #include "gimple-expr.h"
111 #include "alloc-pool.h"
112 #include "tree-pass.h"
114 #include "diagnostic.h"
115 #include "fold-const.h"
116 #include "gimple-fold.h"
117 #include "symbol-summary.h"
118 #include "tree-vrp.h"
119 #include "ipa-prop.h"
120 #include "tree-pretty-print.h"
121 #include "tree-inline.h"
122 #include "ipa-fnsummary.h"
123 #include "ipa-utils.h"
124 #include "tree-ssa-ccp.h"
125 #include "stringpool.h"
128 #include "symtab-clones.h"
130 template <typename valtype
> class ipcp_value
;
132 /* Describes a particular source for an IPA-CP value. */
134 template <typename valtype
>
135 struct ipcp_value_source
138 /* Aggregate offset of the source, negative if the source is scalar value of
139 the argument itself. */
140 HOST_WIDE_INT offset
;
141 /* The incoming edge that brought the value. */
143 /* If the jump function that resulted into his value was a pass-through or an
144 ancestor, this is the ipcp_value of the caller from which the described
145 value has been derived. Otherwise it is NULL. */
146 ipcp_value
<valtype
> *val
;
147 /* Next pointer in a linked list of sources of a value. */
148 ipcp_value_source
*next
;
149 /* If the jump function that resulted into his value was a pass-through or an
150 ancestor, this is the index of the parameter of the caller the jump
151 function references. */
155 /* Common ancestor for all ipcp_value instantiations. */
157 class ipcp_value_base
160 /* Time benefit and that specializing the function for this value would bring
161 about in this function alone. */
162 sreal local_time_benefit
;
163 /* Time benefit that specializing the function for this value can bring about
165 sreal prop_time_benefit
;
166 /* Size cost that specializing the function for this value would bring about
167 in this function alone. */
169 /* Size cost that specializing the function for this value can bring about in
174 : local_time_benefit (0), prop_time_benefit (0),
175 local_size_cost (0), prop_size_cost (0) {}
178 /* Describes one particular value stored in struct ipcp_lattice. */
180 template <typename valtype
>
181 class ipcp_value
: public ipcp_value_base
184 /* The actual value for the given parameter. */
186 /* The list of sources from which this value originates. */
187 ipcp_value_source
<valtype
> *sources
= nullptr;
188 /* Next pointers in a linked list of all values in a lattice. */
189 ipcp_value
*next
= nullptr;
190 /* Next pointers in a linked list of values in a strongly connected component
192 ipcp_value
*scc_next
= nullptr;
193 /* Next pointers in a linked list of SCCs of values sorted topologically
194 according their sources. */
195 ipcp_value
*topo_next
= nullptr;
196 /* A specialized node created for this value, NULL if none has been (so far)
198 cgraph_node
*spec_node
= nullptr;
199 /* Depth first search number and low link for topological sorting of
203 /* SCC number to identify values which recursively feed into each other.
204 Values in the same SCC have the same SCC number. */
206 /* Non zero if the value is generated from another value in the same lattice
207 for a self-recursive call, the actual number is how many times the
208 operation has been performed. In the unlikely event of the value being
209 present in two chains fo self-recursive value generation chains, it is the
211 unsigned self_recursion_generated_level
= 0;
212 /* True if this value is currently on the topo-sort stack. */
213 bool on_stack
= false;
215 void add_source (cgraph_edge
*cs
, ipcp_value
*src_val
, int src_idx
,
216 HOST_WIDE_INT offset
);
218 /* Return true if both THIS value and O feed into each other. */
220 bool same_scc (const ipcp_value
<valtype
> *o
)
222 return o
->scc_no
== scc_no
;
225 /* Return true, if a this value has been generated for a self-recursive call as
226 a result of an arithmetic pass-through jump-function acting on a value in
227 the same lattice function. */
229 bool self_recursion_generated_p ()
231 return self_recursion_generated_level
> 0;
235 /* Lattice describing potential values of a formal parameter of a function, or
236 a part of an aggregate. TOP is represented by a lattice with zero values
237 and with contains_variable and bottom flags cleared. BOTTOM is represented
238 by a lattice with the bottom flag set. In that case, values and
239 contains_variable flag should be disregarded. */
241 template <typename valtype
>
245 /* The list of known values and types in this lattice. Note that values are
246 not deallocated if a lattice is set to bottom because there may be value
247 sources referencing them. */
248 ipcp_value
<valtype
> *values
;
249 /* Number of known values and types in this lattice. */
251 /* The lattice contains a variable component (in addition to values). */
252 bool contains_variable
;
253 /* The value of the lattice is bottom (i.e. variable and unusable for any
257 inline bool is_single_const ();
258 inline bool set_to_bottom ();
259 inline bool set_contains_variable ();
260 bool add_value (valtype newval
, cgraph_edge
*cs
,
261 ipcp_value
<valtype
> *src_val
= NULL
,
262 int src_idx
= 0, HOST_WIDE_INT offset
= -1,
263 ipcp_value
<valtype
> **val_p
= NULL
,
264 unsigned same_lat_gen_level
= 0);
265 void print (FILE * f
, bool dump_sources
, bool dump_benefits
);
268 /* Lattice of tree values with an offset to describe a part of an
271 struct ipcp_agg_lattice
: public ipcp_lattice
<tree
>
274 /* Offset that is being described by this lattice. */
275 HOST_WIDE_INT offset
;
276 /* Size so that we don't have to re-compute it every time we traverse the
277 list. Must correspond to TYPE_SIZE of all lat values. */
279 /* Next element of the linked list. */
280 struct ipcp_agg_lattice
*next
;
283 /* Lattice of known bits, only capable of holding one value.
284 Bitwise constant propagation propagates which bits of a
300 In the above case, the param 'x' will always have all
301 the bits (except the bits in lsb) set to 0.
302 Hence the mask of 'x' would be 0xff. The mask
303 reflects that the bits in lsb are unknown.
304 The actual propagated value is given by m_value & ~m_mask. */
306 class ipcp_bits_lattice
309 bool bottom_p () const { return m_lattice_val
== IPA_BITS_VARYING
; }
310 bool top_p () const { return m_lattice_val
== IPA_BITS_UNDEFINED
; }
311 bool constant_p () const { return m_lattice_val
== IPA_BITS_CONSTANT
; }
312 bool set_to_bottom ();
313 bool set_to_constant (widest_int
, widest_int
);
314 bool known_nonzero_p () const;
316 widest_int
get_value () const { return m_value
; }
317 widest_int
get_mask () const { return m_mask
; }
319 bool meet_with (ipcp_bits_lattice
& other
, unsigned, signop
,
320 enum tree_code
, tree
, bool);
322 bool meet_with (widest_int
, widest_int
, unsigned);
327 enum { IPA_BITS_UNDEFINED
, IPA_BITS_CONSTANT
, IPA_BITS_VARYING
} m_lattice_val
;
329 /* Similar to ccp_lattice_t, mask represents which bits of value are constant.
330 If a bit in mask is set to 0, then the corresponding bit in
331 value is known to be constant. */
332 widest_int m_value
, m_mask
;
334 bool meet_with_1 (widest_int
, widest_int
, unsigned, bool);
335 void get_value_and_mask (tree
, widest_int
*, widest_int
*);
338 /* Lattice of value ranges. */
340 class ipcp_vr_lattice
345 inline bool bottom_p () const;
346 inline bool top_p () const;
347 inline bool set_to_bottom ();
348 bool meet_with (const value_range
*p_vr
);
349 bool meet_with (const ipcp_vr_lattice
&other
);
350 void init () { gcc_assert (m_vr
.undefined_p ()); }
351 void print (FILE * f
);
354 bool meet_with_1 (const value_range
*other_vr
);
357 /* Structure containing lattices for a parameter itself and for pieces of
358 aggregates that are passed in the parameter or by a reference in a parameter
359 plus some other useful flags. */
361 class ipcp_param_lattices
364 /* Lattice describing the value of the parameter itself. */
365 ipcp_lattice
<tree
> itself
;
366 /* Lattice describing the polymorphic contexts of a parameter. */
367 ipcp_lattice
<ipa_polymorphic_call_context
> ctxlat
;
368 /* Lattices describing aggregate parts. */
369 ipcp_agg_lattice
*aggs
;
370 /* Lattice describing known bits. */
371 ipcp_bits_lattice bits_lattice
;
372 /* Lattice describing value range. */
373 ipcp_vr_lattice m_value_range
;
374 /* Number of aggregate lattices */
376 /* True if aggregate data were passed by reference (as opposed to by
379 /* All aggregate lattices contain a variable component (in addition to
381 bool aggs_contain_variable
;
382 /* The value of all aggregate lattices is bottom (i.e. variable and unusable
383 for any propagation). */
386 /* There is a virtual call based on this parameter. */
390 /* Allocation pools for values and their sources in ipa-cp. */
392 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
393 ("IPA-CP constant values");
395 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
396 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
398 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
399 ("IPA-CP value sources");
401 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
402 ("IPA_CP aggregate lattices");
404 /* Base count to use in heuristics when using profile feedback. */
406 static profile_count base_count
;
408 /* Original overall size of the program. */
410 static long overall_size
, orig_overall_size
;
412 /* Node name to unique clone suffix number map. */
413 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
415 /* Return the param lattices structure corresponding to the Ith formal
416 parameter of the function described by INFO. */
417 static inline class ipcp_param_lattices
*
418 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
420 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
421 gcc_checking_assert (!info
->ipcp_orig_node
);
422 gcc_checking_assert (info
->lattices
);
423 return &(info
->lattices
[i
]);
426 /* Return the lattice corresponding to the scalar value of the Ith formal
427 parameter of the function described by INFO. */
428 static inline ipcp_lattice
<tree
> *
429 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
431 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
432 return &plats
->itself
;
435 /* Return the lattice corresponding to the scalar value of the Ith formal
436 parameter of the function described by INFO. */
437 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
438 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
440 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
441 return &plats
->ctxlat
;
444 /* Return whether LAT is a lattice with a single constant and without an
447 template <typename valtype
>
449 ipcp_lattice
<valtype
>::is_single_const ()
451 if (bottom
|| contains_variable
|| values_count
!= 1)
457 /* Print V which is extracted from a value in a lattice to F. */
460 print_ipcp_constant_value (FILE * f
, tree v
)
462 if (TREE_CODE (v
) == ADDR_EXPR
463 && TREE_CODE (TREE_OPERAND (v
, 0)) == CONST_DECL
)
466 print_generic_expr (f
, DECL_INITIAL (TREE_OPERAND (v
, 0)));
469 print_generic_expr (f
, v
);
472 /* Print V which is extracted from a value in a lattice to F. */
475 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
480 /* Print a lattice LAT to F. */
482 template <typename valtype
>
484 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
486 ipcp_value
<valtype
> *val
;
491 fprintf (f
, "BOTTOM\n");
495 if (!values_count
&& !contains_variable
)
497 fprintf (f
, "TOP\n");
501 if (contains_variable
)
503 fprintf (f
, "VARIABLE");
509 for (val
= values
; val
; val
= val
->next
)
511 if (dump_benefits
&& prev
)
513 else if (!dump_benefits
&& prev
)
518 print_ipcp_constant_value (f
, val
->value
);
522 ipcp_value_source
<valtype
> *s
;
524 if (val
->self_recursion_generated_p ())
525 fprintf (f
, " [self_gen(%i), from:",
526 val
->self_recursion_generated_level
);
528 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
529 for (s
= val
->sources
; s
; s
= s
->next
)
530 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
531 s
->cs
->sreal_frequency ().to_double ());
536 fprintf (f
, " [loc_time: %g, loc_size: %i, "
537 "prop_time: %g, prop_size: %i]\n",
538 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
539 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
546 ipcp_bits_lattice::print (FILE *f
)
549 fprintf (f
, " Bits unknown (TOP)\n");
550 else if (bottom_p ())
551 fprintf (f
, " Bits unusable (BOTTOM)\n");
554 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
555 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
560 /* Print value range lattice to F. */
563 ipcp_vr_lattice::print (FILE * f
)
565 dump_value_range (f
, &m_vr
);
568 /* Print all ipcp_lattices of all functions to F. */
571 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
573 struct cgraph_node
*node
;
576 fprintf (f
, "\nLattices:\n");
577 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
579 class ipa_node_params
*info
;
581 info
= ipa_node_params_sum
->get (node
);
582 /* Skip unoptimized functions and constprop clones since we don't make
583 lattices for them. */
584 if (!info
|| info
->ipcp_orig_node
)
586 fprintf (f
, " Node: %s:\n", node
->dump_name ());
587 count
= ipa_get_param_count (info
);
588 for (i
= 0; i
< count
; i
++)
590 struct ipcp_agg_lattice
*aglat
;
591 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
592 fprintf (f
, " param [%d]: ", i
);
593 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
594 fprintf (f
, " ctxs: ");
595 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
596 plats
->bits_lattice
.print (f
);
598 plats
->m_value_range
.print (f
);
600 if (plats
->virt_call
)
601 fprintf (f
, " virt_call flag set\n");
603 if (plats
->aggs_bottom
)
605 fprintf (f
, " AGGS BOTTOM\n");
608 if (plats
->aggs_contain_variable
)
609 fprintf (f
, " AGGS VARIABLE\n");
610 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
612 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
613 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
614 aglat
->print (f
, dump_sources
, dump_benefits
);
620 /* Determine whether it is at all technically possible to create clones of NODE
621 and store this information in the ipa_node_params structure associated
625 determine_versionability (struct cgraph_node
*node
,
626 class ipa_node_params
*info
)
628 const char *reason
= NULL
;
630 /* There are a number of generic reasons functions cannot be versioned. We
631 also cannot remove parameters if there are type attributes such as fnspec
633 if (node
->alias
|| node
->thunk
)
634 reason
= "alias or thunk";
635 else if (!node
->versionable
)
636 reason
= "not a tree_versionable_function";
637 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
638 reason
= "insufficient body availability";
639 else if (!opt_for_fn (node
->decl
, optimize
)
640 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
641 reason
= "non-optimized function";
642 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
644 /* Ideally we should clone the SIMD clones themselves and create
645 vector copies of them, so IPA-cp and SIMD clones can happily
646 coexist, but that may not be worth the effort. */
647 reason
= "function has SIMD clones";
649 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
651 /* Ideally we should clone the target clones themselves and create
652 copies of them, so IPA-cp and target clones can happily
653 coexist, but that may not be worth the effort. */
654 reason
= "function target_clones attribute";
656 /* Don't clone decls local to a comdat group; it breaks and for C++
657 decloned constructors, inlining is always better anyway. */
658 else if (node
->comdat_local_p ())
659 reason
= "comdat-local function";
660 else if (node
->calls_comdat_local
)
662 /* TODO: call is versionable if we make sure that all
663 callers are inside of a comdat group. */
664 reason
= "calls comdat-local function";
667 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
668 work only when inlined. Cloning them may still lead to better code
669 because ipa-cp will not give up on cloning further. If the function is
670 external this however leads to wrong code because we may end up producing
671 offline copy of the function. */
672 if (DECL_EXTERNAL (node
->decl
))
673 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
674 edge
= edge
->next_callee
)
675 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
677 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
678 reason
= "external function which calls va_arg_pack";
679 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
680 == BUILT_IN_VA_ARG_PACK_LEN
)
681 reason
= "external function which calls va_arg_pack_len";
684 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
685 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
686 node
->dump_name (), reason
);
688 info
->versionable
= (reason
== NULL
);
691 /* Return true if it is at all technically possible to create clones of a
695 ipcp_versionable_function_p (struct cgraph_node
*node
)
697 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
698 return info
&& info
->versionable
;
701 /* Structure holding accumulated information about callers of a node. */
703 struct caller_statistics
705 /* If requested (see below), self-recursive call counts are summed into this
707 profile_count rec_count_sum
;
708 /* The sum of all ipa counts of all the other (non-recursive) calls. */
709 profile_count count_sum
;
710 /* Sum of all frequencies for all calls. */
712 /* Number of calls and hot calls respectively. */
713 int n_calls
, n_hot_calls
;
714 /* If itself is set up, also count the number of non-self-recursive
717 /* If non-NULL, this is the node itself and calls from it should have their
718 counts included in rec_count_sum and not count_sum. */
722 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
723 from IGNORED_CALLER are not counted. */
726 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
728 stats
->rec_count_sum
= profile_count::zero ();
729 stats
->count_sum
= profile_count::zero ();
731 stats
->n_hot_calls
= 0;
732 stats
->n_nonrec_calls
= 0;
734 stats
->itself
= itself
;
737 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
738 non-thunk incoming edges to NODE. */
741 gather_caller_stats (struct cgraph_node
*node
, void *data
)
743 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
744 struct cgraph_edge
*cs
;
746 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
747 if (!cs
->caller
->thunk
)
749 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
750 if (info
&& info
->node_dead
)
753 if (cs
->count
.ipa ().initialized_p ())
755 if (stats
->itself
&& stats
->itself
== cs
->caller
)
756 stats
->rec_count_sum
+= cs
->count
.ipa ();
758 stats
->count_sum
+= cs
->count
.ipa ();
760 stats
->freq_sum
+= cs
->sreal_frequency ();
762 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
763 stats
->n_nonrec_calls
++;
765 if (cs
->maybe_hot_p ())
766 stats
->n_hot_calls
++;
772 /* Return true if this NODE is viable candidate for cloning. */
775 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
777 struct caller_statistics stats
;
779 gcc_checking_assert (node
->has_gimple_body_p ());
781 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
784 fprintf (dump_file
, "Not considering %s for cloning; "
785 "-fipa-cp-clone disabled.\n",
790 if (node
->optimize_for_size_p ())
793 fprintf (dump_file
, "Not considering %s for cloning; "
794 "optimizing it for size.\n",
799 init_caller_stats (&stats
);
800 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
802 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
805 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
810 /* When profile is available and function is hot, propagate into it even if
811 calls seems cold; constant propagation can improve function's speed
813 if (stats
.count_sum
> profile_count::zero ()
814 && node
->count
.ipa ().initialized_p ())
816 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
819 fprintf (dump_file
, "Considering %s for cloning; "
820 "usually called directly.\n",
825 if (!stats
.n_hot_calls
)
828 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
833 fprintf (dump_file
, "Considering %s for cloning.\n",
838 template <typename valtype
>
839 class value_topo_info
842 /* Head of the linked list of topologically sorted values. */
843 ipcp_value
<valtype
> *values_topo
;
844 /* Stack for creating SCCs, represented by a linked list too. */
845 ipcp_value
<valtype
> *stack
;
846 /* Counter driving the algorithm in add_val_to_toposort. */
849 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
851 void add_val (ipcp_value
<valtype
> *cur_val
);
852 void propagate_effects ();
855 /* Arrays representing a topological ordering of call graph nodes and a stack
856 of nodes used during constant propagation and also data required to perform
857 topological sort of values and propagation of benefits in the determined
863 /* Array with obtained topological order of cgraph nodes. */
864 struct cgraph_node
**order
;
865 /* Stack of cgraph nodes used during propagation within SCC until all values
866 in the SCC stabilize. */
867 struct cgraph_node
**stack
;
868 int nnodes
, stack_top
;
870 value_topo_info
<tree
> constants
;
871 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
873 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
878 /* Skip edges from and to nodes without ipa_cp enabled.
879 Ignore not available symbols. */
882 ignore_edge_p (cgraph_edge
*e
)
884 enum availability avail
;
885 cgraph_node
*ultimate_target
886 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
888 return (avail
<= AVAIL_INTERPOSABLE
889 || !opt_for_fn (ultimate_target
->decl
, optimize
)
890 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
893 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
896 build_toporder_info (class ipa_topo_info
*topo
)
898 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
899 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
901 gcc_checking_assert (topo
->stack_top
== 0);
902 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
906 /* Free information about strongly connected components and the arrays in
910 free_toporder_info (class ipa_topo_info
*topo
)
912 ipa_free_postorder_info ();
917 /* Add NODE to the stack in TOPO, unless it is already there. */
920 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
922 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
923 if (info
->node_enqueued
)
925 info
->node_enqueued
= 1;
926 topo
->stack
[topo
->stack_top
++] = node
;
929 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
932 static struct cgraph_node
*
933 pop_node_from_stack (class ipa_topo_info
*topo
)
937 struct cgraph_node
*node
;
939 node
= topo
->stack
[topo
->stack_top
];
940 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
947 /* Set lattice LAT to bottom and return true if it previously was not set as
950 template <typename valtype
>
952 ipcp_lattice
<valtype
>::set_to_bottom ()
959 /* Mark lattice as containing an unknown value and return true if it previously
960 was not marked as such. */
962 template <typename valtype
>
964 ipcp_lattice
<valtype
>::set_contains_variable ()
966 bool ret
= !contains_variable
;
967 contains_variable
= true;
971 /* Set all aggregate lattices in PLATS to bottom and return true if they were
972 not previously set as such. */
975 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
977 bool ret
= !plats
->aggs_bottom
;
978 plats
->aggs_bottom
= true;
982 /* Mark all aggregate lattices in PLATS as containing an unknown value and
983 return true if they were not previously marked as such. */
986 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
988 bool ret
= !plats
->aggs_contain_variable
;
989 plats
->aggs_contain_variable
= true;
994 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
996 return meet_with_1 (&other
.m_vr
);
999 /* Meet the current value of the lattice with value range described by VR
1003 ipcp_vr_lattice::meet_with (const value_range
*p_vr
)
1005 return meet_with_1 (p_vr
);
1008 /* Meet the current value of the lattice with value range described by
1009 OTHER_VR lattice. Return TRUE if anything changed. */
1012 ipcp_vr_lattice::meet_with_1 (const value_range
*other_vr
)
1017 if (other_vr
->varying_p ())
1018 return set_to_bottom ();
1020 value_range
save (m_vr
);
1021 m_vr
.union_ (other_vr
);
1022 return !m_vr
.equal_p (save
);
1025 /* Return true if value range information in the lattice is yet unknown. */
1028 ipcp_vr_lattice::top_p () const
1030 return m_vr
.undefined_p ();
1033 /* Return true if value range information in the lattice is known to be
1037 ipcp_vr_lattice::bottom_p () const
1039 return m_vr
.varying_p ();
1042 /* Set value range information in the lattice to bottom. Return true if it
1043 previously was in a different state. */
1046 ipcp_vr_lattice::set_to_bottom ()
1048 if (m_vr
.varying_p ())
1050 /* ?? We create all sorts of VARYING ranges for floats, structures,
1051 and other types which we cannot handle as ranges. We should
1052 probably avoid handling them throughout the pass, but it's easier
1053 to create a sensible VARYING here and let the lattice
1055 m_vr
.set_varying (integer_type_node
);
1059 /* Set lattice value to bottom, if it already isn't the case. */
1062 ipcp_bits_lattice::set_to_bottom ()
1066 m_lattice_val
= IPA_BITS_VARYING
;
1072 /* Set to constant if it isn't already. Only meant to be called
1073 when switching state from TOP. */
1076 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
1078 gcc_assert (top_p ());
1079 m_lattice_val
= IPA_BITS_CONSTANT
;
1080 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
1085 /* Return true if any of the known bits are non-zero. */
1088 ipcp_bits_lattice::known_nonzero_p () const
1092 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask
), m_value
), 0);
1095 /* Convert operand to value, mask form. */
1098 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
1100 wide_int
get_nonzero_bits (const_tree
);
1102 if (TREE_CODE (operand
) == INTEGER_CST
)
1104 *valuep
= wi::to_widest (operand
);
1114 /* Meet operation, similar to ccp_lattice_meet, we xor values
1115 if this->value, value have different values at same bit positions, we want
1116 to drop that bit to varying. Return true if mask is changed.
1117 This function assumes that the lattice value is in CONSTANT state. If
1118 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1121 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
1122 unsigned precision
, bool drop_all_ones
)
1124 gcc_assert (constant_p ());
1126 widest_int old_mask
= m_mask
;
1127 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
1132 if (wi::sext (m_mask
, precision
) == -1)
1133 return set_to_bottom ();
1135 return m_mask
!= old_mask
;
1138 /* Meet the bits lattice with operand
1139 described by <value, mask, sgn, precision. */
1142 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
1150 if (wi::sext (mask
, precision
) == -1)
1151 return set_to_bottom ();
1152 return set_to_constant (value
, mask
);
1155 return meet_with_1 (value
, mask
, precision
, false);
1158 /* Meet bits lattice with the result of bit_value_binop (other, operand)
1159 if code is binary operation or bit_value_unop (other) if code is unary op.
1160 In the case when code is nop_expr, no adjustment is required. If
1161 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
1164 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
1165 signop sgn
, enum tree_code code
, tree operand
,
1168 if (other
.bottom_p ())
1169 return set_to_bottom ();
1171 if (bottom_p () || other
.top_p ())
1174 widest_int adjusted_value
, adjusted_mask
;
1176 if (TREE_CODE_CLASS (code
) == tcc_binary
)
1178 tree type
= TREE_TYPE (operand
);
1179 widest_int o_value
, o_mask
;
1180 get_value_and_mask (operand
, &o_value
, &o_mask
);
1182 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
1183 sgn
, precision
, other
.get_value (), other
.get_mask (),
1184 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
1186 if (wi::sext (adjusted_mask
, precision
) == -1)
1187 return set_to_bottom ();
1190 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
1192 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
1193 &adjusted_mask
, sgn
, precision
, other
.get_value (),
1196 if (wi::sext (adjusted_mask
, precision
) == -1)
1197 return set_to_bottom ();
1201 return set_to_bottom ();
1207 adjusted_mask
|= adjusted_value
;
1208 adjusted_value
&= ~adjusted_mask
;
1210 if (wi::sext (adjusted_mask
, precision
) == -1)
1211 return set_to_bottom ();
1212 return set_to_constant (adjusted_value
, adjusted_mask
);
1215 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
,
1219 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1220 return true is any of them has not been marked as such so far. */
1223 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1226 ret
= plats
->itself
.set_contains_variable ();
1227 ret
|= plats
->ctxlat
.set_contains_variable ();
1228 ret
|= set_agg_lats_contain_variable (plats
);
1229 ret
|= plats
->bits_lattice
.set_to_bottom ();
1230 ret
|= plats
->m_value_range
.set_to_bottom ();
1234 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1235 points to by the number of callers to NODE. */
1238 count_callers (cgraph_node
*node
, void *data
)
1240 int *caller_count
= (int *) data
;
1242 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1243 /* Local thunks can be handled transparently, but if the thunk cannot
1244 be optimized out, count it as a real use. */
1245 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1250 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1251 the one caller of some other node. Set the caller's corresponding flag. */
1254 set_single_call_flag (cgraph_node
*node
, void *)
1256 cgraph_edge
*cs
= node
->callers
;
1257 /* Local thunks can be handled transparently, skip them. */
1258 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1259 cs
= cs
->next_caller
;
1261 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1263 info
->node_calling_single_call
= true;
1269 /* Initialize ipcp_lattices. */
1272 initialize_node_lattices (struct cgraph_node
*node
)
1274 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1275 struct cgraph_edge
*ie
;
1276 bool disable
= false, variable
= false;
1279 gcc_checking_assert (node
->has_gimple_body_p ());
1281 if (!ipa_get_param_count (info
))
1283 else if (node
->local
)
1285 int caller_count
= 0;
1286 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1288 gcc_checking_assert (caller_count
> 0);
1289 if (caller_count
== 1)
1290 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1295 /* When cloning is allowed, we can assume that externally visible
1296 functions are not called. We will compensate this by cloning
1298 if (ipcp_versionable_function_p (node
)
1299 && ipcp_cloning_candidate_p (node
))
1305 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1306 && !node
->alias
&& !node
->thunk
)
1308 fprintf (dump_file
, "Initializing lattices of %s\n",
1309 node
->dump_name ());
1310 if (disable
|| variable
)
1311 fprintf (dump_file
, " Marking all lattices as %s\n",
1312 disable
? "BOTTOM" : "VARIABLE");
1315 auto_vec
<bool, 16> surviving_params
;
1316 bool pre_modified
= false;
1318 clone_info
*cinfo
= clone_info::get (node
);
1320 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1322 /* At the moment all IPA optimizations should use the number of
1323 parameters of the prevailing decl as the m_always_copy_start.
1324 Handling any other value would complicate the code below, so for the
1325 time bing let's only assert it is so. */
1326 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1327 == ipa_get_param_count (info
))
1328 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1330 pre_modified
= true;
1331 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1333 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1334 && !node
->alias
&& !node
->thunk
)
1337 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1339 if (j
< (int) surviving_params
.length ()
1340 && surviving_params
[j
])
1345 " The following parameters are dead on arrival:");
1348 fprintf (dump_file
, " %u", j
);
1351 fprintf (dump_file
, "\n");
1355 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1357 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1359 || !ipa_get_type (info
, i
)
1360 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1361 || !surviving_params
[i
])))
1363 plats
->itself
.set_to_bottom ();
1364 plats
->ctxlat
.set_to_bottom ();
1365 set_agg_lats_to_bottom (plats
);
1366 plats
->bits_lattice
.set_to_bottom ();
1367 plats
->m_value_range
.m_vr
= value_range ();
1368 plats
->m_value_range
.set_to_bottom ();
1372 plats
->m_value_range
.init ();
1374 set_all_contains_variable (plats
);
1378 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1379 if (ie
->indirect_info
->polymorphic
1380 && ie
->indirect_info
->param_index
>= 0)
1382 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1383 ipa_get_parm_lattices (info
,
1384 ie
->indirect_info
->param_index
)->virt_call
= 1;
1388 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1392 ipacp_value_safe_for_type (tree param_type
, tree value
)
1394 tree val_type
= TREE_TYPE (value
);
1395 if (param_type
== val_type
1396 || useless_type_conversion_p (param_type
, val_type
)
1397 || fold_convertible_p (param_type
, value
))
1403 /* Return true iff X and Y should be considered equal values by IPA-CP. */
1406 values_equal_for_ipcp_p (tree x
, tree y
)
1408 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
1413 if (TREE_CODE (x
) == ADDR_EXPR
1414 && TREE_CODE (y
) == ADDR_EXPR
1415 && TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
1416 && TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
)
1417 return operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
1418 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
1420 return operand_equal_p (x
, y
, 0);
1423 /* Return the result of a (possibly arithmetic) operation on the constant
1424 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1425 the type of the parameter to which the result is passed. Return
1426 NULL_TREE if that cannot be determined or be considered an
1427 interprocedural invariant. */
1430 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1435 if (opcode
== NOP_EXPR
)
1437 if (!is_gimple_ip_invariant (input
))
1440 if (opcode
== ASSERT_EXPR
)
1442 if (values_equal_for_ipcp_p (input
, operand
))
1450 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1451 res_type
= boolean_type_node
;
1452 else if (expr_type_first_operand_type_p (opcode
))
1453 res_type
= TREE_TYPE (input
);
1458 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1459 res
= fold_unary (opcode
, res_type
, input
);
1461 res
= fold_binary (opcode
, res_type
, input
, operand
);
1463 if (res
&& !is_gimple_ip_invariant (res
))
1469 /* Return the result of a (possibly arithmetic) pass through jump function
1470 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1471 to which the result is passed. Return NULL_TREE if that cannot be
1472 determined or be considered an interprocedural invariant. */
1475 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1478 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1480 ipa_get_jf_pass_through_operand (jfunc
),
1484 /* Return the result of an ancestor jump function JFUNC on the constant value
1485 INPUT. Return NULL_TREE if that cannot be determined. */
1488 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1490 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1491 if (TREE_CODE (input
) == ADDR_EXPR
)
1493 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1494 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1495 if (known_eq (off
, 0))
1497 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1498 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1499 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1500 build_int_cst (ptr_type_node
, byte_offset
)));
1502 else if (ipa_get_jf_ancestor_keep_null (jfunc
)
1509 /* Determine whether JFUNC evaluates to a single known constant value and if
1510 so, return it. Otherwise return NULL. INFO describes the caller node or
1511 the one it is inlined to, so that pass-through jump functions can be
1512 evaluated. PARM_TYPE is the type of the parameter to which the result is
1516 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1519 if (jfunc
->type
== IPA_JF_CONST
)
1520 return ipa_get_jf_constant (jfunc
);
1521 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1522 || jfunc
->type
== IPA_JF_ANCESTOR
)
1527 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1528 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1530 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1532 if (info
->ipcp_orig_node
)
1533 input
= info
->known_csts
[idx
];
1536 ipcp_lattice
<tree
> *lat
;
1539 || idx
>= ipa_get_param_count (info
))
1541 lat
= ipa_get_scalar_lat (info
, idx
);
1542 if (!lat
->is_single_const ())
1544 input
= lat
->values
->value
;
1550 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1551 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1553 return ipa_get_jf_ancestor_result (jfunc
, input
);
1559 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1560 that INFO describes the caller node or the one it is inlined to, CS is the
1561 call graph edge corresponding to JFUNC and CSIDX index of the described
1564 ipa_polymorphic_call_context
1565 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1566 ipa_jump_func
*jfunc
)
1568 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1569 ipa_polymorphic_call_context ctx
;
1570 ipa_polymorphic_call_context
*edge_ctx
1571 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1573 if (edge_ctx
&& !edge_ctx
->useless_p ())
1576 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1577 || jfunc
->type
== IPA_JF_ANCESTOR
)
1579 ipa_polymorphic_call_context srcctx
;
1581 bool type_preserved
= true;
1582 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1584 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1586 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1587 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1591 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1592 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1594 if (info
->ipcp_orig_node
)
1596 if (info
->known_contexts
.exists ())
1597 srcctx
= info
->known_contexts
[srcidx
];
1602 || srcidx
>= ipa_get_param_count (info
))
1604 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1605 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1606 if (!lat
->is_single_const ())
1608 srcctx
= lat
->values
->value
;
1610 if (srcctx
.useless_p ())
1612 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1613 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1614 if (!type_preserved
)
1615 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1616 srcctx
.combine_with (ctx
);
1623 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1624 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1625 the result is a range or an anti-range. */
1628 ipa_vr_operation_and_type_effects (value_range
*dst_vr
,
1629 value_range
*src_vr
,
1630 enum tree_code operation
,
1631 tree dst_type
, tree src_type
)
1633 range_fold_unary_expr (dst_vr
, operation
, dst_type
, src_vr
, src_type
);
1634 if (dst_vr
->varying_p () || dst_vr
->undefined_p ())
1639 /* Determine value_range of JFUNC given that INFO describes the caller node or
1640 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1641 and PARM_TYPE of the parameter. */
1644 ipa_value_range_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
,
1645 ipa_jump_func
*jfunc
, tree parm_type
)
1649 ipa_vr_operation_and_type_effects (&vr
,
1651 NOP_EXPR
, parm_type
,
1652 jfunc
->m_vr
->type ());
1653 if (vr
.singleton_p ())
1655 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1658 ipcp_transformation
*sum
1659 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1660 ? cs
->caller
->inlined_to
1662 if (!sum
|| !sum
->m_vr
)
1665 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1667 if (!(*sum
->m_vr
)[idx
].known
)
1669 tree vr_type
= ipa_get_type (info
, idx
);
1670 value_range
srcvr (wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].min
),
1671 wide_int_to_tree (vr_type
, (*sum
->m_vr
)[idx
].max
),
1672 (*sum
->m_vr
)[idx
].type
);
1674 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1676 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1680 if (ipa_vr_operation_and_type_effects (&res
,
1682 operation
, parm_type
,
1688 value_range op_res
, res
;
1689 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1690 value_range
op_vr (op
, op
);
1692 range_fold_binary_expr (&op_res
, operation
, vr_type
, &srcvr
, &op_vr
);
1693 if (ipa_vr_operation_and_type_effects (&res
,
1695 NOP_EXPR
, parm_type
,
1703 /* See if NODE is a clone with a known aggregate value at a given OFFSET of a
1704 parameter with the given INDEX. */
1707 get_clone_agg_value (struct cgraph_node
*node
, HOST_WIDE_INT offset
,
1710 struct ipa_agg_replacement_value
*aggval
;
1712 aggval
= ipa_get_agg_replacements_for_node (node
);
1715 if (aggval
->offset
== offset
1716 && aggval
->index
== index
)
1717 return aggval
->value
;
1718 aggval
= aggval
->next
;
1723 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1724 single known constant value and if so, return it. Otherwise return NULL.
1725 NODE and INFO describes the caller node or the one it is inlined to, and
1726 its related info. */
1729 ipa_agg_value_from_node (class ipa_node_params
*info
,
1730 struct cgraph_node
*node
,
1731 struct ipa_agg_jf_item
*item
)
1733 tree value
= NULL_TREE
;
1736 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
1739 if (item
->jftype
== IPA_JF_CONST
)
1740 return item
->value
.constant
;
1742 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
1743 || item
->jftype
== IPA_JF_LOAD_AGG
);
1745 src_idx
= item
->value
.pass_through
.formal_id
;
1747 if (info
->ipcp_orig_node
)
1749 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1750 value
= info
->known_csts
[src_idx
];
1752 value
= get_clone_agg_value (node
, item
->value
.load_agg
.offset
,
1755 else if (info
->lattices
)
1757 class ipcp_param_lattices
*src_plats
1758 = ipa_get_parm_lattices (info
, src_idx
);
1760 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1762 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
1764 if (!lat
->is_single_const ())
1767 value
= lat
->values
->value
;
1769 else if (src_plats
->aggs
1770 && !src_plats
->aggs_bottom
1771 && !src_plats
->aggs_contain_variable
1772 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
1774 struct ipcp_agg_lattice
*aglat
;
1776 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
1778 if (aglat
->offset
> item
->value
.load_agg
.offset
)
1781 if (aglat
->offset
== item
->value
.load_agg
.offset
)
1783 if (aglat
->is_single_const ())
1784 value
= aglat
->values
->value
;
1794 if (item
->jftype
== IPA_JF_LOAD_AGG
)
1796 tree load_type
= item
->value
.load_agg
.type
;
1797 tree value_type
= TREE_TYPE (value
);
1799 /* Ensure value type is compatible with load type. */
1800 if (!useless_type_conversion_p (load_type
, value_type
))
1804 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
1806 item
->value
.pass_through
.operand
,
1810 /* Determine whether AGG_JFUNC evaluates to a set of known constant value for
1811 an aggregate and if so, return it. Otherwise return an empty set. NODE
1812 and INFO describes the caller node or the one it is inlined to, and its
1815 struct ipa_agg_value_set
1816 ipa_agg_value_set_from_jfunc (class ipa_node_params
*info
, cgraph_node
*node
,
1817 struct ipa_agg_jump_function
*agg_jfunc
)
1819 struct ipa_agg_value_set agg
;
1820 struct ipa_agg_jf_item
*item
;
1824 agg
.by_ref
= agg_jfunc
->by_ref
;
1826 FOR_EACH_VEC_SAFE_ELT (agg_jfunc
->items
, i
, item
)
1828 tree value
= ipa_agg_value_from_node (info
, node
, item
);
1832 struct ipa_agg_value value_item
;
1834 value_item
.offset
= item
->offset
;
1835 value_item
.value
= value
;
1837 agg
.items
.safe_push (value_item
);
1843 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1844 bottom, not containing a variable component and without any known value at
1848 ipcp_verify_propagated_values (void)
1850 struct cgraph_node
*node
;
1852 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
1854 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1855 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
1856 || !opt_for_fn (node
->decl
, optimize
))
1858 int i
, count
= ipa_get_param_count (info
);
1860 for (i
= 0; i
< count
; i
++)
1862 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
1865 && !lat
->contains_variable
1866 && lat
->values_count
== 0)
1870 symtab
->dump (dump_file
);
1871 fprintf (dump_file
, "\nIPA lattices after constant "
1872 "propagation, before gcc_unreachable:\n");
1873 print_all_lattices (dump_file
, true, false);
1882 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1885 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
1886 ipa_polymorphic_call_context y
)
1888 return x
.equal_to (y
);
1892 /* Add a new value source to the value represented by THIS, marking that a
1893 value comes from edge CS and (if the underlying jump function is a
1894 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1895 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1896 scalar value of the parameter itself or the offset within an aggregate. */
1898 template <typename valtype
>
1900 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
1901 int src_idx
, HOST_WIDE_INT offset
)
1903 ipcp_value_source
<valtype
> *src
;
1905 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
1906 src
->offset
= offset
;
1909 src
->index
= src_idx
;
1911 src
->next
= sources
;
1915 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1916 SOURCE and clear all other fields. */
1918 static ipcp_value
<tree
> *
1919 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
1921 ipcp_value
<tree
> *val
;
1923 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
1925 val
->self_recursion_generated_level
= same_lat_gen_level
;
1929 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1930 value to SOURCE and clear all other fields. */
1932 static ipcp_value
<ipa_polymorphic_call_context
> *
1933 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
1934 unsigned same_lat_gen_level
)
1936 ipcp_value
<ipa_polymorphic_call_context
> *val
;
1938 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
1939 ipcp_value
<ipa_polymorphic_call_context
>();
1941 val
->self_recursion_generated_level
= same_lat_gen_level
;
1945 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1946 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1947 meaning. OFFSET -1 means the source is scalar and not a part of an
1948 aggregate. If non-NULL, VAL_P records address of existing or newly added
1951 If the value is generated for a self-recursive call as a result of an
1952 arithmetic pass-through jump-function acting on a value in the same lattice,
1953 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
1954 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
1956 template <typename valtype
>
1958 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
1959 ipcp_value
<valtype
> *src_val
,
1960 int src_idx
, HOST_WIDE_INT offset
,
1961 ipcp_value
<valtype
> **val_p
,
1962 unsigned same_lat_gen_level
)
1964 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
1972 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
1973 if (values_equal_for_ipcp_p (val
->value
, newval
))
1978 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
1979 val
->self_recursion_generated_level
= same_lat_gen_level
;
1981 if (ipa_edge_within_scc (cs
))
1983 ipcp_value_source
<valtype
> *s
;
1984 for (s
= val
->sources
; s
; s
= s
->next
)
1985 if (s
->cs
== cs
&& s
->val
== src_val
)
1991 val
->add_source (cs
, src_val
, src_idx
, offset
);
1995 if (!same_lat_gen_level
&& values_count
== opt_for_fn (cs
->caller
->decl
,
1996 param_ipa_cp_value_list_size
))
1998 /* We can only free sources, not the values themselves, because sources
1999 of other values in this SCC might point to them. */
2000 for (val
= values
; val
; val
= val
->next
)
2002 while (val
->sources
)
2004 ipcp_value_source
<valtype
> *src
= val
->sources
;
2005 val
->sources
= src
->next
;
2006 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
2010 return set_to_bottom ();
2014 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
2015 val
->add_source (cs
, src_val
, src_idx
, offset
);
2018 /* Add the new value to end of value list, which can reduce iterations
2019 of propagation stage for recursive function. */
2021 last_val
->next
= val
;
2031 /* A helper function that returns result of operation specified by OPCODE on
2032 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2033 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2034 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2038 get_val_across_arith_op (enum tree_code opcode
,
2041 ipcp_value
<tree
> *src_val
,
2044 tree opnd1
= src_val
->value
;
2046 /* Skip source values that is incompatible with specified type. */
2048 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2051 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2054 /* Propagate values through an arithmetic transformation described by a jump
2055 function associated with edge CS, taking values from SRC_LAT and putting
2056 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2057 OPND2 is a constant value if transformation is a binary operation.
2058 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2059 a part of the aggregate. SRC_IDX is the index of the source parameter.
2060 RES_TYPE is the value type of result being propagated into. Return true if
2061 DEST_LAT changed. */
2064 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2065 enum tree_code opcode
,
2068 ipcp_lattice
<tree
> *src_lat
,
2069 ipcp_lattice
<tree
> *dest_lat
,
2070 HOST_WIDE_INT src_offset
,
2074 ipcp_value
<tree
> *src_val
;
2077 /* Due to circular dependencies, propagating within an SCC through arithmetic
2078 transformation would create infinite number of values. But for
2079 self-feeding recursive function, we could allow propagation in a limited
2080 count, and this can enable a simple kind of recursive function versioning.
2081 For other scenario, we would just make lattices bottom. */
2082 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2086 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2087 param_ipa_cp_max_recursive_depth
);
2088 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2089 return dest_lat
->set_contains_variable ();
2091 /* No benefit if recursive execution is in low probability. */
2092 if (cs
->sreal_frequency () * 100
2093 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2094 param_ipa_cp_min_recursive_probability
))
2095 return dest_lat
->set_contains_variable ();
2097 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2099 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2101 /* Now we do not use self-recursively generated value as propagation
2102 source, this is absolutely conservative, but could avoid explosion
2103 of lattice's value space, especially when one recursive function
2104 calls another recursive. */
2105 if (src_val
->self_recursion_generated_p ())
2107 ipcp_value_source
<tree
> *s
;
2109 /* If the lattice has already been propagated for the call site,
2110 no need to do that again. */
2111 for (s
= src_val
->sources
; s
; s
= s
->next
)
2113 return dest_lat
->set_contains_variable ();
2116 val_seeds
.safe_push (src_val
);
2119 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2121 /* Recursively generate lattice values with a limited count. */
2122 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2124 for (int j
= 1; j
< max_recursive_depth
; j
++)
2126 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2129 || !ipacp_value_safe_for_type (res_type
, cstval
))
2132 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2133 src_offset
, &src_val
, j
);
2134 gcc_checking_assert (src_val
);
2137 ret
|= dest_lat
->set_contains_variable ();
2140 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2142 /* Now we do not use self-recursively generated value as propagation
2143 source, otherwise it is easy to make value space of normal lattice
2145 if (src_val
->self_recursion_generated_p ())
2147 ret
|= dest_lat
->set_contains_variable ();
2151 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2154 && ipacp_value_safe_for_type (res_type
, cstval
))
2155 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2158 ret
|= dest_lat
->set_contains_variable ();
2164 /* Propagate values through a pass-through jump function JFUNC associated with
2165 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2166 is the index of the source parameter. PARM_TYPE is the type of the
2167 parameter to which the result is passed. */
2170 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2171 ipcp_lattice
<tree
> *src_lat
,
2172 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2175 return propagate_vals_across_arith_jfunc (cs
,
2176 ipa_get_jf_pass_through_operation (jfunc
),
2178 ipa_get_jf_pass_through_operand (jfunc
),
2179 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2182 /* Propagate values through an ancestor jump function JFUNC associated with
2183 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2184 is the index of the source parameter. */
2187 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2188 struct ipa_jump_func
*jfunc
,
2189 ipcp_lattice
<tree
> *src_lat
,
2190 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2193 ipcp_value
<tree
> *src_val
;
2196 if (ipa_edge_within_scc (cs
))
2197 return dest_lat
->set_contains_variable ();
2199 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2201 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2203 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2204 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2206 ret
|= dest_lat
->set_contains_variable ();
2212 /* Propagate scalar values across jump function JFUNC that is associated with
2213 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2214 parameter to which the result is passed. */
2217 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2218 struct ipa_jump_func
*jfunc
,
2219 ipcp_lattice
<tree
> *dest_lat
,
2222 if (dest_lat
->bottom
)
2225 if (jfunc
->type
== IPA_JF_CONST
)
2227 tree val
= ipa_get_jf_constant (jfunc
);
2228 if (ipacp_value_safe_for_type (param_type
, val
))
2229 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2231 return dest_lat
->set_contains_variable ();
2233 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2234 || jfunc
->type
== IPA_JF_ANCESTOR
)
2236 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2237 ipcp_lattice
<tree
> *src_lat
;
2241 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2242 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2244 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2246 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2247 if (src_lat
->bottom
)
2248 return dest_lat
->set_contains_variable ();
2250 /* If we would need to clone the caller and cannot, do not propagate. */
2251 if (!ipcp_versionable_function_p (cs
->caller
)
2252 && (src_lat
->contains_variable
2253 || (src_lat
->values_count
> 1)))
2254 return dest_lat
->set_contains_variable ();
2256 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2257 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2261 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2262 src_idx
, param_type
);
2264 if (src_lat
->contains_variable
)
2265 ret
|= dest_lat
->set_contains_variable ();
2270 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2271 use it for indirect inlining), we should propagate them too. */
2272 return dest_lat
->set_contains_variable ();
2275 /* Propagate scalar values across jump function JFUNC that is associated with
2276 edge CS and describes argument IDX and put the values into DEST_LAT. */
2279 propagate_context_across_jump_function (cgraph_edge
*cs
,
2280 ipa_jump_func
*jfunc
, int idx
,
2281 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2283 if (dest_lat
->bottom
)
2285 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2287 bool added_sth
= false;
2288 bool type_preserved
= true;
2290 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2291 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2294 edge_ctx
= *edge_ctx_ptr
;
2296 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2297 || jfunc
->type
== IPA_JF_ANCESTOR
)
2299 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2301 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2303 /* TODO: Once we figure out how to propagate speculations, it will
2304 probably be a good idea to switch to speculation if type_preserved is
2305 not set instead of punting. */
2306 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2308 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2310 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2311 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2315 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2316 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2319 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2320 /* If we would need to clone the caller and cannot, do not propagate. */
2321 if (!ipcp_versionable_function_p (cs
->caller
)
2322 && (src_lat
->contains_variable
2323 || (src_lat
->values_count
> 1)))
2326 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2327 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2329 ipa_polymorphic_call_context cur
= src_val
->value
;
2331 if (!type_preserved
)
2332 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2333 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2334 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2335 /* TODO: In cases we know how the context is going to be used,
2336 we can improve the result by passing proper OTR_TYPE. */
2337 cur
.combine_with (edge_ctx
);
2338 if (!cur
.useless_p ())
2340 if (src_lat
->contains_variable
2341 && !edge_ctx
.equal_to (cur
))
2342 ret
|= dest_lat
->set_contains_variable ();
2343 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2352 if (!edge_ctx
.useless_p ())
2353 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2355 ret
|= dest_lat
->set_contains_variable ();
2361 /* Propagate bits across jfunc that is associated with
2362 edge cs and update dest_lattice accordingly. */
2365 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2366 ipa_jump_func
*jfunc
,
2367 ipcp_bits_lattice
*dest_lattice
)
2369 if (dest_lattice
->bottom_p ())
2372 enum availability availability
;
2373 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2374 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2375 tree parm_type
= ipa_get_type (callee_info
, idx
);
2377 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2378 transform for these cases. Similarly, we can have bad type mismatches
2379 with LTO, avoid doing anything with those too. */
2381 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2383 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2384 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2385 "param %i of %s is NULL or unsuitable for bits propagation\n",
2386 idx
, cs
->callee
->dump_name ());
2388 return dest_lattice
->set_to_bottom ();
2391 unsigned precision
= TYPE_PRECISION (parm_type
);
2392 signop sgn
= TYPE_SIGN (parm_type
);
2394 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2395 || jfunc
->type
== IPA_JF_ANCESTOR
)
2397 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2398 tree operand
= NULL_TREE
;
2399 enum tree_code code
;
2401 bool keep_null
= false;
2403 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2405 code
= ipa_get_jf_pass_through_operation (jfunc
);
2406 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2407 if (code
!= NOP_EXPR
)
2408 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2412 code
= POINTER_PLUS_EXPR
;
2413 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2414 unsigned HOST_WIDE_INT offset
2415 = ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2416 keep_null
= (ipa_get_jf_ancestor_keep_null (jfunc
) || !offset
);
2417 operand
= build_int_cstu (size_type_node
, offset
);
2420 class ipcp_param_lattices
*src_lats
2421 = ipa_get_parm_lattices (caller_info
, src_idx
);
2423 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2429 Assume lattice for x is bottom, however we can still propagate
2430 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2431 and we store it in jump function during analysis stage. */
2433 if (!src_lats
->bits_lattice
.bottom_p ())
2436 = keep_null
&& !src_lats
->bits_lattice
.known_nonzero_p ();
2438 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
,
2439 sgn
, code
, operand
, drop_all_ones
);
2444 return dest_lattice
->meet_with (jfunc
->bits
->value
, jfunc
->bits
->mask
,
2447 return dest_lattice
->set_to_bottom ();
2450 /* Propagate value range across jump function JFUNC that is associated with
2451 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2455 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2456 class ipcp_param_lattices
*dest_plats
,
2459 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2461 if (dest_lat
->bottom_p ())
2465 || (!INTEGRAL_TYPE_P (param_type
)
2466 && !POINTER_TYPE_P (param_type
)))
2467 return dest_lat
->set_to_bottom ();
2469 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2471 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2472 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2473 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2474 class ipcp_param_lattices
*src_lats
2475 = ipa_get_parm_lattices (caller_info
, src_idx
);
2476 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2478 if (src_lats
->m_value_range
.bottom_p ())
2479 return dest_lat
->set_to_bottom ();
2482 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2483 ipa_vr_operation_and_type_effects (&vr
,
2484 &src_lats
->m_value_range
.m_vr
,
2485 operation
, param_type
,
2487 /* A crude way to prevent unbounded number of value range updates
2488 in SCC components. We should allow limited number of updates within
2490 else if (!ipa_edge_within_scc (cs
))
2492 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2493 value_range
op_vr (op
, op
);
2494 value_range op_res
,res
;
2496 range_fold_binary_expr (&op_res
, operation
, operand_type
,
2497 &src_lats
->m_value_range
.m_vr
, &op_vr
);
2498 ipa_vr_operation_and_type_effects (&vr
,
2500 NOP_EXPR
, param_type
,
2503 if (!vr
.undefined_p () && !vr
.varying_p ())
2508 if (ipa_vr_operation_and_type_effects (&jvr
, jfunc
->m_vr
,
2511 jfunc
->m_vr
->type ()))
2514 return dest_lat
->meet_with (&vr
);
2517 else if (jfunc
->type
== IPA_JF_CONST
)
2519 tree val
= ipa_get_jf_constant (jfunc
);
2520 if (TREE_CODE (val
) == INTEGER_CST
)
2522 val
= fold_convert (param_type
, val
);
2523 if (TREE_OVERFLOW_P (val
))
2524 val
= drop_tree_overflow (val
);
2526 value_range
tmpvr (val
, val
);
2527 return dest_lat
->meet_with (&tmpvr
);
2533 && ipa_vr_operation_and_type_effects (&vr
, jfunc
->m_vr
, NOP_EXPR
,
2535 jfunc
->m_vr
->type ()))
2536 return dest_lat
->meet_with (&vr
);
2538 return dest_lat
->set_to_bottom ();
2541 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2542 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2543 other cases, return false). If there are no aggregate items, set
2544 aggs_by_ref to NEW_AGGS_BY_REF. */
2547 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2548 bool new_aggs_by_ref
)
2550 if (dest_plats
->aggs
)
2552 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2554 set_agg_lats_to_bottom (dest_plats
);
2559 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2563 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2564 already existing lattice for the given OFFSET and SIZE, marking all skipped
2565 lattices as containing variable and checking for overlaps. If there is no
2566 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2567 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2568 unless there are too many already. If there are two many, return false. If
2569 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2570 skipped lattices were newly marked as containing variable, set *CHANGE to
2571 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2574 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2575 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2576 struct ipcp_agg_lattice
***aglat
,
2577 bool pre_existing
, bool *change
, int max_agg_items
)
2579 gcc_checking_assert (offset
>= 0);
2581 while (**aglat
&& (**aglat
)->offset
< offset
)
2583 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2585 set_agg_lats_to_bottom (dest_plats
);
2588 *change
|= (**aglat
)->set_contains_variable ();
2589 *aglat
= &(**aglat
)->next
;
2592 if (**aglat
&& (**aglat
)->offset
== offset
)
2594 if ((**aglat
)->size
!= val_size
)
2596 set_agg_lats_to_bottom (dest_plats
);
2599 gcc_assert (!(**aglat
)->next
2600 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2605 struct ipcp_agg_lattice
*new_al
;
2607 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2609 set_agg_lats_to_bottom (dest_plats
);
2612 if (dest_plats
->aggs_count
== max_agg_items
)
2614 dest_plats
->aggs_count
++;
2615 new_al
= ipcp_agg_lattice_pool
.allocate ();
2616 memset (new_al
, 0, sizeof (*new_al
));
2618 new_al
->offset
= offset
;
2619 new_al
->size
= val_size
;
2620 new_al
->contains_variable
= pre_existing
;
2622 new_al
->next
= **aglat
;
2628 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2629 containing an unknown value. */
2632 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2637 ret
|= aglat
->set_contains_variable ();
2638 aglat
= aglat
->next
;
2643 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2644 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2645 parameter used for lattice value sources. Return true if DEST_PLATS changed
2649 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2650 class ipcp_param_lattices
*dest_plats
,
2651 class ipcp_param_lattices
*src_plats
,
2652 int src_idx
, HOST_WIDE_INT offset_delta
)
2654 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2655 struct ipcp_agg_lattice
**dst_aglat
;
2658 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2660 if (src_plats
->aggs_bottom
)
2661 return set_agg_lats_contain_variable (dest_plats
);
2662 if (src_plats
->aggs_contain_variable
)
2663 ret
|= set_agg_lats_contain_variable (dest_plats
);
2664 dst_aglat
= &dest_plats
->aggs
;
2666 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2667 param_ipa_max_agg_items
);
2668 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2670 src_aglat
= src_aglat
->next
)
2672 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2676 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2677 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2679 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2681 dst_aglat
= &(*dst_aglat
)->next
;
2682 if (src_aglat
->bottom
)
2684 ret
|= new_al
->set_contains_variable ();
2687 if (src_aglat
->contains_variable
)
2688 ret
|= new_al
->set_contains_variable ();
2689 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2692 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2695 else if (dest_plats
->aggs_bottom
)
2698 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2702 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2703 pass-through JFUNC and if so, whether it has conform and conforms to the
2704 rules about propagating values passed by reference. */
2707 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2708 struct ipa_jump_func
*jfunc
)
2710 return src_plats
->aggs
2711 && (!src_plats
->aggs_by_ref
2712 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2715 /* Propagate values through ITEM, jump function for a part of an aggregate,
2716 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2717 associated with the jump function. Return true if AGLAT changed in any
2721 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
2722 struct ipa_agg_jf_item
*item
,
2723 struct ipcp_agg_lattice
*aglat
)
2725 class ipa_node_params
*caller_info
;
2726 class ipcp_param_lattices
*src_plats
;
2727 struct ipcp_lattice
<tree
> *src_lat
;
2728 HOST_WIDE_INT src_offset
;
2733 if (item
->jftype
== IPA_JF_CONST
)
2735 tree value
= item
->value
.constant
;
2737 gcc_checking_assert (is_gimple_ip_invariant (value
));
2738 return aglat
->add_value (value
, cs
, NULL
, 0);
2741 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2742 || item
->jftype
== IPA_JF_LOAD_AGG
);
2744 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2745 src_idx
= item
->value
.pass_through
.formal_id
;
2746 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2748 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2750 load_type
= NULL_TREE
;
2751 src_lat
= &src_plats
->itself
;
2756 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
2757 struct ipcp_agg_lattice
*src_aglat
;
2759 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
2760 if (src_aglat
->offset
>= load_offset
)
2763 load_type
= item
->value
.load_agg
.type
;
2765 || src_aglat
->offset
> load_offset
2766 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
2767 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
2768 return aglat
->set_contains_variable ();
2770 src_lat
= src_aglat
;
2771 src_offset
= load_offset
;
2775 || (!ipcp_versionable_function_p (cs
->caller
)
2776 && !src_lat
->is_single_const ()))
2777 return aglat
->set_contains_variable ();
2779 ret
= propagate_vals_across_arith_jfunc (cs
,
2780 item
->value
.pass_through
.operation
,
2782 item
->value
.pass_through
.operand
,
2788 if (src_lat
->contains_variable
)
2789 ret
|= aglat
->set_contains_variable ();
2794 /* Propagate scalar values across jump function JFUNC that is associated with
2795 edge CS and put the values into DEST_LAT. */
2798 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
2799 struct ipa_jump_func
*jfunc
,
2800 class ipcp_param_lattices
*dest_plats
)
2804 if (dest_plats
->aggs_bottom
)
2807 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2808 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
2810 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2811 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2812 class ipcp_param_lattices
*src_plats
;
2814 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2815 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
2817 /* Currently we do not produce clobber aggregate jump
2818 functions, replace with merging when we do. */
2819 gcc_assert (!jfunc
->agg
.items
);
2820 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
2825 else if (jfunc
->type
== IPA_JF_ANCESTOR
2826 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
2828 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2829 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2830 class ipcp_param_lattices
*src_plats
;
2832 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2833 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
2835 /* Currently we do not produce clobber aggregate jump
2836 functions, replace with merging when we do. */
2837 gcc_assert (!jfunc
->agg
.items
);
2838 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
2839 ipa_get_jf_ancestor_offset (jfunc
));
2841 else if (!src_plats
->aggs_by_ref
)
2842 ret
|= set_agg_lats_to_bottom (dest_plats
);
2844 ret
|= set_agg_lats_contain_variable (dest_plats
);
2848 if (jfunc
->agg
.items
)
2850 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2851 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
2852 struct ipa_agg_jf_item
*item
;
2855 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
2858 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2859 param_ipa_max_agg_items
);
2860 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
2862 HOST_WIDE_INT val_size
;
2864 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
2866 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
2868 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
2869 &aglat
, pre_existing
, &ret
, max_agg_items
))
2871 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
2872 aglat
= &(*aglat
)->next
;
2874 else if (dest_plats
->aggs_bottom
)
2878 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
2881 ret
|= set_agg_lats_contain_variable (dest_plats
);
2886 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2887 non-thunk) destination, the call passes through a thunk. */
2890 call_passes_through_thunk (cgraph_edge
*cs
)
2892 cgraph_node
*alias_or_thunk
= cs
->callee
;
2893 while (alias_or_thunk
->alias
)
2894 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
2895 return alias_or_thunk
->thunk
;
2898 /* Propagate constants from the caller to the callee of CS. INFO describes the
2902 propagate_constants_across_call (struct cgraph_edge
*cs
)
2904 class ipa_node_params
*callee_info
;
2905 enum availability availability
;
2906 cgraph_node
*callee
;
2907 class ipa_edge_args
*args
;
2909 int i
, args_count
, parms_count
;
2911 callee
= cs
->callee
->function_symbol (&availability
);
2912 if (!callee
->definition
)
2914 gcc_checking_assert (callee
->has_gimple_body_p ());
2915 callee_info
= ipa_node_params_sum
->get (callee
);
2919 args
= ipa_edge_args_sum
->get (cs
);
2920 parms_count
= ipa_get_param_count (callee_info
);
2921 if (parms_count
== 0)
2924 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
2925 || !opt_for_fn (cs
->caller
->decl
, optimize
))
2927 for (i
= 0; i
< parms_count
; i
++)
2928 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2932 args_count
= ipa_get_cs_argument_count (args
);
2934 /* If this call goes through a thunk we must not propagate to the first (0th)
2935 parameter. However, we might need to uncover a thunk from below a series
2936 of aliases first. */
2937 if (call_passes_through_thunk (cs
))
2939 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
2946 for (; (i
< args_count
) && (i
< parms_count
); i
++)
2948 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
2949 class ipcp_param_lattices
*dest_plats
;
2950 tree param_type
= ipa_get_type (callee_info
, i
);
2952 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
2953 if (availability
== AVAIL_INTERPOSABLE
)
2954 ret
|= set_all_contains_variable (dest_plats
);
2957 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
2958 &dest_plats
->itself
,
2960 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
2961 &dest_plats
->ctxlat
);
2963 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
2964 &dest_plats
->bits_lattice
);
2965 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
2967 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
2968 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
2969 dest_plats
, param_type
);
2971 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
2974 for (; i
< parms_count
; i
++)
2975 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
2980 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
2981 KNOWN_CONTEXTS, KNOWN_AGGS or AGG_REPS return the destination. The latter
2982 three can be NULL. If AGG_REPS is not NULL, KNOWN_AGGS is ignored. */
2985 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
2986 const vec
<tree
> &known_csts
,
2987 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
2988 const vec
<ipa_agg_value_set
> &known_aggs
,
2989 struct ipa_agg_replacement_value
*agg_reps
,
2992 int param_index
= ie
->indirect_info
->param_index
;
2993 HOST_WIDE_INT anc_offset
;
2997 *speculative
= false;
2999 if (param_index
== -1)
3002 if (!ie
->indirect_info
->polymorphic
)
3006 if (ie
->indirect_info
->agg_contents
)
3009 if (agg_reps
&& ie
->indirect_info
->guaranteed_unmodified
)
3013 if (agg_reps
->index
== param_index
3014 && agg_reps
->offset
== ie
->indirect_info
->offset
3015 && agg_reps
->by_ref
== ie
->indirect_info
->by_ref
)
3017 t
= agg_reps
->value
;
3020 agg_reps
= agg_reps
->next
;
3025 const ipa_agg_value_set
*agg
;
3026 if (known_aggs
.length () > (unsigned int) param_index
)
3027 agg
= &known_aggs
[param_index
];
3030 bool from_global_constant
;
3031 t
= ipa_find_agg_cst_for_param (agg
,
3032 (unsigned) param_index
3033 < known_csts
.length ()
3034 ? known_csts
[param_index
]
3036 ie
->indirect_info
->offset
,
3037 ie
->indirect_info
->by_ref
,
3038 &from_global_constant
);
3040 && !from_global_constant
3041 && !ie
->indirect_info
->guaranteed_unmodified
)
3045 else if ((unsigned) param_index
< known_csts
.length ())
3046 t
= known_csts
[param_index
];
3049 && TREE_CODE (t
) == ADDR_EXPR
3050 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3051 return TREE_OPERAND (t
, 0);
3056 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3059 gcc_assert (!ie
->indirect_info
->agg_contents
);
3060 anc_offset
= ie
->indirect_info
->offset
;
3064 /* Try to work out value of virtual table pointer value in replacements. */
3065 if (!t
&& agg_reps
&& !ie
->indirect_info
->by_ref
)
3069 if (agg_reps
->index
== param_index
3070 && agg_reps
->offset
== ie
->indirect_info
->offset
3071 && agg_reps
->by_ref
)
3073 t
= agg_reps
->value
;
3076 agg_reps
= agg_reps
->next
;
3080 /* Try to work out value of virtual table pointer value in known
3081 aggregate values. */
3082 if (!t
&& known_aggs
.length () > (unsigned int) param_index
3083 && !ie
->indirect_info
->by_ref
)
3085 const ipa_agg_value_set
*agg
= &known_aggs
[param_index
];
3086 t
= ipa_find_agg_cst_for_param (agg
,
3087 (unsigned) param_index
3088 < known_csts
.length ()
3089 ? known_csts
[param_index
] : NULL
,
3090 ie
->indirect_info
->offset
, true);
3093 /* If we found the virtual table pointer, lookup the target. */
3097 unsigned HOST_WIDE_INT offset
;
3098 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3101 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3102 vtable
, offset
, &can_refer
);
3106 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3107 || !possible_polymorphic_call_target_p
3108 (ie
, cgraph_node::get (target
)))
3110 /* Do not speculate builtin_unreachable, it is stupid! */
3111 if (ie
->indirect_info
->vptr_changed
)
3113 target
= ipa_impossible_devirt_target (ie
, target
);
3115 *speculative
= ie
->indirect_info
->vptr_changed
;
3122 /* Do we know the constant value of pointer? */
3123 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3124 t
= known_csts
[param_index
];
3126 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3128 ipa_polymorphic_call_context context
;
3129 if (known_contexts
.length () > (unsigned int) param_index
)
3131 context
= known_contexts
[param_index
];
3132 context
.offset_by (anc_offset
);
3133 if (ie
->indirect_info
->vptr_changed
)
3134 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3135 ie
->indirect_info
->otr_type
);
3138 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3139 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3140 if (!ctx2
.useless_p ())
3141 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3146 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3148 if (ie
->indirect_info
->vptr_changed
)
3149 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3150 ie
->indirect_info
->otr_type
);
3155 vec
<cgraph_node
*>targets
;
3158 targets
= possible_polymorphic_call_targets
3159 (ie
->indirect_info
->otr_type
,
3160 ie
->indirect_info
->otr_token
,
3162 if (!final
|| targets
.length () > 1)
3164 struct cgraph_node
*node
;
3167 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3168 || ie
->speculative
|| !ie
->maybe_hot_p ())
3170 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3171 ie
->indirect_info
->otr_token
,
3175 *speculative
= true;
3176 target
= node
->decl
;
3183 *speculative
= false;
3184 if (targets
.length () == 1)
3185 target
= targets
[0]->decl
;
3187 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3190 if (target
&& !possible_polymorphic_call_target_p (ie
,
3191 cgraph_node::get (target
)))
3195 target
= ipa_impossible_devirt_target (ie
, target
);
3201 /* If an indirect edge IE can be turned into a direct one based on data in
3202 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3203 whether the discovered target is only speculative guess. */
3206 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3207 ipa_call_arg_values
*avals
,
3210 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3211 avals
->m_known_contexts
,
3212 avals
->m_known_aggs
,
3216 /* The same functionality as above overloaded for ipa_auto_call_arg_values. */
3219 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3220 ipa_auto_call_arg_values
*avals
,
3223 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3224 avals
->m_known_contexts
,
3225 avals
->m_known_aggs
,
3229 /* Calculate devirtualization time bonus for NODE, assuming we know information
3230 about arguments stored in AVALS. */
3233 devirtualization_time_bonus (struct cgraph_node
*node
,
3234 ipa_auto_call_arg_values
*avals
)
3236 struct cgraph_edge
*ie
;
3239 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3241 struct cgraph_node
*callee
;
3242 class ipa_fn_summary
*isummary
;
3243 enum availability avail
;
3247 target
= ipa_get_indirect_edge_target (ie
, avals
, &speculative
);
3251 /* Only bare minimum benefit for clearly un-inlineable targets. */
3253 callee
= cgraph_node::get (target
);
3254 if (!callee
|| !callee
->definition
)
3256 callee
= callee
->function_symbol (&avail
);
3257 if (avail
< AVAIL_AVAILABLE
)
3259 isummary
= ipa_fn_summaries
->get (callee
);
3260 if (!isummary
|| !isummary
->inlinable
)
3263 int size
= ipa_size_summaries
->get (callee
)->size
;
3264 /* FIXME: The values below need re-considering and perhaps also
3265 integrating into the cost metrics, at lest in some very basic way. */
3266 int max_inline_insns_auto
3267 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3268 if (size
<= max_inline_insns_auto
/ 4)
3269 res
+= 31 / ((int)speculative
+ 1);
3270 else if (size
<= max_inline_insns_auto
/ 2)
3271 res
+= 15 / ((int)speculative
+ 1);
3272 else if (size
<= max_inline_insns_auto
3273 || DECL_DECLARED_INLINE_P (callee
->decl
))
3274 res
+= 7 / ((int)speculative
+ 1);
3280 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3283 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3286 ipa_hints hints
= estimates
.hints
;
3287 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3288 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3290 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3292 if (hints
& INLINE_HINT_loop_iterations
)
3293 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3295 if (hints
& INLINE_HINT_loop_stride
)
3296 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3301 /* If there is a reason to penalize the function described by INFO in the
3302 cloning goodness evaluation, do so. */
3305 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3308 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3309 evaluation
= (evaluation
3310 * (100 - opt_for_fn (node
->decl
,
3311 param_ipa_cp_recursion_penalty
))) / 100;
3313 if (info
->node_calling_single_call
)
3314 evaluation
= (evaluation
3315 * (100 - opt_for_fn (node
->decl
,
3316 param_ipa_cp_single_call_penalty
)))
3322 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3323 and SIZE_COST and with the sum of frequencies of incoming edges to the
3324 potential new clone in FREQUENCIES. */
3327 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3328 sreal freq_sum
, profile_count count_sum
,
3331 if (time_benefit
== 0
3332 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3333 || node
->optimize_for_size_p ())
3336 gcc_assert (size_cost
> 0);
3338 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3339 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3340 if (count_sum
> profile_count::zero ())
3342 gcc_assert (base_count
> profile_count::zero ());
3343 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3344 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3345 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3348 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3350 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3351 "size: %i, count_sum: ", time_benefit
.to_double (),
3353 count_sum
.dump (dump_file
);
3354 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3355 info
->node_within_scc
3356 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3357 info
->node_calling_single_call
? ", single_call" : "",
3358 evaluation
.to_double (), eval_threshold
);
3361 return evaluation
.to_int () >= eval_threshold
;
3365 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3366 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3370 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3371 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3373 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3374 info
->node_within_scc
3375 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3376 info
->node_calling_single_call
? ", single_call" : "",
3377 evaluation
.to_double (), eval_threshold
);
3379 return evaluation
.to_int () >= eval_threshold
;
3383 /* Return all context independent values from aggregate lattices in PLATS in a
3384 vector. Return NULL if there are none. */
3386 static vec
<ipa_agg_value
>
3387 context_independent_aggregate_values (class ipcp_param_lattices
*plats
)
3389 vec
<ipa_agg_value
> res
= vNULL
;
3391 if (plats
->aggs_bottom
3392 || plats
->aggs_contain_variable
3393 || plats
->aggs_count
== 0)
3396 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
;
3398 aglat
= aglat
->next
)
3399 if (aglat
->is_single_const ())
3401 struct ipa_agg_value item
;
3402 item
.offset
= aglat
->offset
;
3403 item
.value
= aglat
->values
->value
;
3404 res
.safe_push (item
);
3409 /* Grow vectors in AVALS and fill them with information about values of
3410 parameters that are known to be independent of the context. Only calculate
3411 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3412 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3413 parameters will be stored in it.
3415 TODO: Also grow context independent value range vectors. */
3418 gather_context_independent_values (class ipa_node_params
*info
,
3419 ipa_auto_call_arg_values
*avals
,
3420 bool calculate_aggs
,
3421 int *removable_params_cost
)
3423 int i
, count
= ipa_get_param_count (info
);
3426 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3427 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3429 avals
->m_known_aggs
.safe_grow_cleared (count
, true);
3431 if (removable_params_cost
)
3432 *removable_params_cost
= 0;
3434 for (i
= 0; i
< count
; i
++)
3436 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3437 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3439 if (lat
->is_single_const ())
3441 ipcp_value
<tree
> *val
= lat
->values
;
3442 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3443 avals
->m_known_vals
[i
] = val
->value
;
3444 if (removable_params_cost
)
3445 *removable_params_cost
3446 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3449 else if (removable_params_cost
3450 && !ipa_is_param_used (info
, i
))
3451 *removable_params_cost
3452 += ipa_get_param_move_cost (info
, i
);
3454 if (!ipa_is_param_used (info
, i
))
3457 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3458 /* Do not account known context as reason for cloning. We can see
3459 if it permits devirtualization. */
3460 if (ctxlat
->is_single_const ())
3461 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3465 vec
<ipa_agg_value
> agg_items
;
3466 struct ipa_agg_value_set
*agg
;
3468 agg_items
= context_independent_aggregate_values (plats
);
3469 agg
= &avals
->m_known_aggs
[i
];
3470 agg
->items
= agg_items
;
3471 agg
->by_ref
= plats
->aggs_by_ref
;
3472 ret
|= !agg_items
.is_empty ();
3479 /* Perform time and size measurement of NODE with the context given in AVALS,
3480 calculate the benefit compared to the node without specialization and store
3481 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3482 context-independent or unused removable parameters and EST_MOVE_COST, the
3483 estimated movement of the considered parameter. */
3486 perform_estimation_of_a_value (cgraph_node
*node
,
3487 ipa_auto_call_arg_values
*avals
,
3488 int removable_params_cost
, int est_move_cost
,
3489 ipcp_value_base
*val
)
3492 ipa_call_estimates estimates
;
3494 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3496 /* Extern inline functions have no cloning local time benefits because they
3497 will be inlined anyway. The only reason to clone them is if it enables
3498 optimization in any of the functions they call. */
3499 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3502 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3503 + (devirtualization_time_bonus (node
, avals
)
3504 + hint_time_bonus (node
, estimates
)
3505 + removable_params_cost
+ est_move_cost
);
3507 int size
= estimates
.size
;
3508 gcc_checking_assert (size
>=0);
3509 /* The inliner-heuristics based estimates may think that in certain
3510 contexts some functions do not have any size at all but we want
3511 all specializations to have at least a tiny cost, not least not to
3516 val
->local_time_benefit
= time_benefit
;
3517 val
->local_size_cost
= size
;
3520 /* Get the overall limit oof growth based on parameters extracted from growth.
3521 it does not really make sense to mix functions with different overall growth
3522 limits but it is possible and if it happens, we do not want to select one
3526 get_max_overall_size (cgraph_node
*node
)
3528 long max_new_size
= orig_overall_size
;
3529 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3530 if (max_new_size
< large_unit
)
3531 max_new_size
= large_unit
;
3532 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3533 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3534 return max_new_size
;
3537 /* Iterate over known values of parameters of NODE and estimate the local
3538 effects in terms of time and size they have. */
3541 estimate_local_effects (struct cgraph_node
*node
)
3543 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3544 int i
, count
= ipa_get_param_count (info
);
3546 int removable_params_cost
;
3548 if (!count
|| !ipcp_versionable_function_p (node
))
3551 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3552 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3554 ipa_auto_call_arg_values avals
;
3555 always_const
= gather_context_independent_values (info
, &avals
, true,
3556 &removable_params_cost
);
3557 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3558 if (always_const
|| devirt_bonus
3559 || (removable_params_cost
&& node
->can_change_signature
))
3561 struct caller_statistics stats
;
3562 ipa_call_estimates estimates
;
3564 init_caller_stats (&stats
);
3565 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3567 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3568 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3569 time
+= devirt_bonus
;
3570 time
+= hint_time_bonus (node
, estimates
);
3571 time
+= removable_params_cost
;
3572 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3575 fprintf (dump_file
, " - context independent values, size: %i, "
3576 "time_benefit: %f\n", size
, (time
).to_double ());
3578 if (size
<= 0 || node
->local
)
3580 info
->do_clone_for_all_contexts
= true;
3583 fprintf (dump_file
, " Decided to specialize for all "
3584 "known contexts, code not going to grow.\n");
3586 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3587 stats
.count_sum
, size
))
3589 if (size
+ overall_size
<= get_max_overall_size (node
))
3591 info
->do_clone_for_all_contexts
= true;
3592 overall_size
+= size
;
3595 fprintf (dump_file
, " Decided to specialize for all "
3596 "known contexts, growth (to %li) deemed "
3597 "beneficial.\n", overall_size
);
3599 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3600 fprintf (dump_file
, " Not cloning for all contexts because "
3601 "maximum unit size would be reached with %li.\n",
3602 size
+ overall_size
);
3604 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3605 fprintf (dump_file
, " Not cloning for all contexts because "
3606 "!good_cloning_opportunity_p.\n");
3610 for (i
= 0; i
< count
; i
++)
3612 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3613 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3614 ipcp_value
<tree
> *val
;
3618 || avals
.m_known_vals
[i
])
3621 for (val
= lat
->values
; val
; val
= val
->next
)
3623 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3624 avals
.m_known_vals
[i
] = val
->value
;
3626 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3627 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3630 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3632 fprintf (dump_file
, " - estimates for value ");
3633 print_ipcp_constant_value (dump_file
, val
->value
);
3634 fprintf (dump_file
, " for ");
3635 ipa_dump_param (dump_file
, info
, i
);
3636 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3637 val
->local_time_benefit
.to_double (),
3638 val
->local_size_cost
);
3641 avals
.m_known_vals
[i
] = NULL_TREE
;
3644 for (i
= 0; i
< count
; i
++)
3646 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3648 if (!plats
->virt_call
)
3651 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3652 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3656 || !avals
.m_known_contexts
[i
].useless_p ())
3659 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3661 avals
.m_known_contexts
[i
] = val
->value
;
3662 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3665 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3667 fprintf (dump_file
, " - estimates for polymorphic context ");
3668 print_ipcp_constant_value (dump_file
, val
->value
);
3669 fprintf (dump_file
, " for ");
3670 ipa_dump_param (dump_file
, info
, i
);
3671 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3672 val
->local_time_benefit
.to_double (),
3673 val
->local_size_cost
);
3676 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3679 for (i
= 0; i
< count
; i
++)
3681 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3683 if (plats
->aggs_bottom
|| !plats
->aggs
)
3686 ipa_agg_value_set
*agg
= &avals
.m_known_aggs
[i
];
3687 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3689 ipcp_value
<tree
> *val
;
3690 if (aglat
->bottom
|| !aglat
->values
3691 /* If the following is true, the one value is in known_aggs. */
3692 || (!plats
->aggs_contain_variable
3693 && aglat
->is_single_const ()))
3696 for (val
= aglat
->values
; val
; val
= val
->next
)
3698 struct ipa_agg_value item
;
3700 item
.offset
= aglat
->offset
;
3701 item
.value
= val
->value
;
3702 agg
->items
.safe_push (item
);
3704 perform_estimation_of_a_value (node
, &avals
,
3705 removable_params_cost
, 0, val
);
3707 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3709 fprintf (dump_file
, " - estimates for value ");
3710 print_ipcp_constant_value (dump_file
, val
->value
);
3711 fprintf (dump_file
, " for ");
3712 ipa_dump_param (dump_file
, info
, i
);
3713 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3714 "]: time_benefit: %g, size: %i\n",
3715 plats
->aggs_by_ref
? "ref " : "",
3717 val
->local_time_benefit
.to_double (),
3718 val
->local_size_cost
);
3728 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3729 topological sort of values. */
3731 template <typename valtype
>
3733 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3735 ipcp_value_source
<valtype
> *src
;
3741 cur_val
->dfs
= dfs_counter
;
3742 cur_val
->low_link
= dfs_counter
;
3744 cur_val
->topo_next
= stack
;
3746 cur_val
->on_stack
= true;
3748 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3751 if (src
->val
->dfs
== 0)
3754 if (src
->val
->low_link
< cur_val
->low_link
)
3755 cur_val
->low_link
= src
->val
->low_link
;
3757 else if (src
->val
->on_stack
3758 && src
->val
->dfs
< cur_val
->low_link
)
3759 cur_val
->low_link
= src
->val
->dfs
;
3762 if (cur_val
->dfs
== cur_val
->low_link
)
3764 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
3769 stack
= v
->topo_next
;
3770 v
->on_stack
= false;
3771 v
->scc_no
= cur_val
->dfs
;
3773 v
->scc_next
= scc_list
;
3776 while (v
!= cur_val
);
3778 cur_val
->topo_next
= values_topo
;
3779 values_topo
= cur_val
;
3783 /* Add all values in lattices associated with NODE to the topological sort if
3784 they are not there yet. */
3787 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
3789 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3790 int i
, count
= ipa_get_param_count (info
);
3792 for (i
= 0; i
< count
; i
++)
3794 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3795 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3796 struct ipcp_agg_lattice
*aglat
;
3800 ipcp_value
<tree
> *val
;
3801 for (val
= lat
->values
; val
; val
= val
->next
)
3802 topo
->constants
.add_val (val
);
3805 if (!plats
->aggs_bottom
)
3806 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3809 ipcp_value
<tree
> *val
;
3810 for (val
= aglat
->values
; val
; val
= val
->next
)
3811 topo
->constants
.add_val (val
);
3814 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3815 if (!ctxlat
->bottom
)
3817 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
3818 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
3819 topo
->contexts
.add_val (ctxval
);
3824 /* One pass of constants propagation along the call graph edges, from callers
3825 to callees (requires topological ordering in TOPO), iterate over strongly
3826 connected components. */
3829 propagate_constants_topo (class ipa_topo_info
*topo
)
3833 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
3836 struct cgraph_node
*v
, *node
= topo
->order
[i
];
3837 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
3839 /* First, iteratively propagate within the strongly connected component
3840 until all lattices stabilize. */
3841 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3842 if (v
->has_gimple_body_p ())
3844 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
3845 && opt_for_fn (v
->decl
, optimize
))
3846 push_node_to_stack (topo
, v
);
3847 /* When V is not optimized, we can not push it to stack, but
3848 still we need to set all its callees lattices to bottom. */
3851 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3852 propagate_constants_across_call (cs
);
3856 v
= pop_node_from_stack (topo
);
3859 struct cgraph_edge
*cs
;
3860 class ipa_node_params
*info
= NULL
;
3861 bool self_scc
= true;
3863 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3864 if (ipa_edge_within_scc (cs
))
3866 cgraph_node
*callee
= cs
->callee
->function_symbol ();
3873 info
= ipa_node_params_sum
->get (v
);
3874 info
->node_within_scc
= true;
3877 if (propagate_constants_across_call (cs
))
3878 push_node_to_stack (topo
, callee
);
3882 info
->node_is_self_scc
= self_scc
;
3884 v
= pop_node_from_stack (topo
);
3887 /* Afterwards, propagate along edges leading out of the SCC, calculates
3888 the local effects of the discovered constants and all valid values to
3889 their topological sort. */
3890 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3891 if (v
->has_gimple_body_p ()
3892 && opt_for_fn (v
->decl
, flag_ipa_cp
)
3893 && opt_for_fn (v
->decl
, optimize
))
3895 struct cgraph_edge
*cs
;
3897 estimate_local_effects (v
);
3898 add_all_node_vals_to_toposort (v
, topo
);
3899 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3900 if (!ipa_edge_within_scc (cs
))
3901 propagate_constants_across_call (cs
);
3903 cycle_nodes
.release ();
3907 /* Propagate the estimated effects of individual values along the topological
3908 from the dependent values to those they depend on. */
3910 template <typename valtype
>
3912 value_topo_info
<valtype
>::propagate_effects ()
3914 ipcp_value
<valtype
> *base
;
3915 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
3917 for (base
= values_topo
; base
; base
= base
->topo_next
)
3919 ipcp_value_source
<valtype
> *src
;
3920 ipcp_value
<valtype
> *val
;
3922 HOST_WIDE_INT size
= 0;
3924 for (val
= base
; val
; val
= val
->scc_next
)
3926 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
3927 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
3930 for (val
= base
; val
; val
= val
->scc_next
)
3932 processed_srcvals
.empty ();
3933 for (src
= val
->sources
; src
; src
= src
->next
)
3935 && src
->cs
->maybe_hot_p ())
3937 if (!processed_srcvals
.add (src
->val
))
3939 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
3940 if (prop_size
< INT_MAX
)
3941 src
->val
->prop_size_cost
= prop_size
;
3946 int special_factor
= 1;
3947 if (val
->same_scc (src
->val
))
3949 = opt_for_fn(src
->cs
->caller
->decl
,
3950 param_ipa_cp_recursive_freq_factor
);
3951 else if (val
->self_recursion_generated_p ()
3952 && (src
->cs
->callee
->function_symbol ()
3953 == src
->cs
->caller
))
3955 int max_recur_gen_depth
3956 = opt_for_fn(src
->cs
->caller
->decl
,
3957 param_ipa_cp_max_recursive_depth
);
3958 special_factor
= max_recur_gen_depth
3959 - val
->self_recursion_generated_level
+ 1;
3962 src
->val
->prop_time_benefit
3963 += time
* special_factor
* src
->cs
->sreal_frequency ();
3968 val
->prop_time_benefit
= time
;
3969 val
->prop_size_cost
= size
;
3973 val
->prop_time_benefit
= 0;
3974 val
->prop_size_cost
= 0;
3980 /* Callback for qsort to sort counts of all edges. */
3983 compare_edge_profile_counts (const void *a
, const void *b
)
3985 const profile_count
*cnt1
= (const profile_count
*) a
;
3986 const profile_count
*cnt2
= (const profile_count
*) b
;
3996 /* Propagate constants, polymorphic contexts and their effects from the
3997 summaries interprocedurally. */
4000 ipcp_propagate_stage (class ipa_topo_info
*topo
)
4002 struct cgraph_node
*node
;
4005 fprintf (dump_file
, "\n Propagating constants:\n\n");
4007 base_count
= profile_count::uninitialized ();
4009 bool compute_count_base
= false;
4010 unsigned base_count_pos_percent
= 0;
4011 FOR_EACH_DEFINED_FUNCTION (node
)
4013 if (node
->has_gimple_body_p ()
4014 && opt_for_fn (node
->decl
, flag_ipa_cp
)
4015 && opt_for_fn (node
->decl
, optimize
))
4017 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4018 determine_versionability (node
, info
);
4020 unsigned nlattices
= ipa_get_param_count (info
);
4021 void *chunk
= XCNEWVEC (class ipcp_param_lattices
, nlattices
);
4022 info
->lattices
= new (chunk
) ipcp_param_lattices
[nlattices
];
4023 initialize_node_lattices (node
);
4025 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4026 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4027 overall_size
+= s
->self_size
;
4028 if (node
->count
.ipa ().initialized_p ())
4030 compute_count_base
= true;
4031 unsigned pos_percent
= opt_for_fn (node
->decl
,
4032 param_ipa_cp_profile_count_base
);
4033 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4037 if (compute_count_base
)
4039 auto_vec
<profile_count
> all_edge_counts
;
4040 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4041 FOR_EACH_DEFINED_FUNCTION (node
)
4042 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4044 profile_count count
= cs
->count
.ipa ();
4045 if (!(count
> profile_count::zero ()))
4048 enum availability avail
;
4050 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4051 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4052 if (info
&& info
->versionable
)
4053 all_edge_counts
.quick_push (count
);
4056 if (!all_edge_counts
.is_empty ())
4058 gcc_assert (base_count_pos_percent
<= 100);
4059 all_edge_counts
.qsort (compare_edge_profile_counts
);
4061 unsigned base_count_pos
4062 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4063 base_count
= all_edge_counts
[base_count_pos
];
4067 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4068 "position %u, arriving at: ", all_edge_counts
.length (),
4070 base_count
.dump (dump_file
);
4071 fprintf (dump_file
, "\n");
4075 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4076 "continuing as if without profile feedback.\n");
4079 orig_overall_size
= overall_size
;
4082 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4084 propagate_constants_topo (topo
);
4086 ipcp_verify_propagated_values ();
4087 topo
->constants
.propagate_effects ();
4088 topo
->contexts
.propagate_effects ();
4092 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4093 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4097 /* Discover newly direct outgoing edges from NODE which is a new clone with
4098 known KNOWN_CSTS and make them direct. */
4101 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4102 vec
<tree
> known_csts
,
4103 vec
<ipa_polymorphic_call_context
>
4105 struct ipa_agg_replacement_value
*aggvals
)
4107 struct cgraph_edge
*ie
, *next_ie
;
4110 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4115 next_ie
= ie
->next_callee
;
4116 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4117 vNULL
, aggvals
, &speculative
);
4120 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4121 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4122 int param_index
= ie
->indirect_info
->param_index
;
4123 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4127 if (cs
&& !agg_contents
&& !polymorphic
)
4129 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4130 int c
= ipa_get_controlled_uses (info
, param_index
);
4131 if (c
!= IPA_UNDESCRIBED_USE
4132 && !ipa_get_param_load_dereferenced (info
, param_index
))
4134 struct ipa_ref
*to_del
;
4137 ipa_set_controlled_uses (info
, param_index
, c
);
4138 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4139 fprintf (dump_file
, " controlled uses count of param "
4140 "%i bumped down to %i\n", param_index
, c
);
4142 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0)))
4144 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4145 fprintf (dump_file
, " and even removing its "
4146 "cloning-created reference\n");
4147 to_del
->remove_reference ();
4153 /* Turning calls to direct calls will improve overall summary. */
4155 ipa_update_overall_fn_summary (node
);
4158 class edge_clone_summary
;
4159 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4161 /* Edge clone summary. */
4163 class edge_clone_summary
4166 /* Default constructor. */
4167 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4169 /* Default destructor. */
4170 ~edge_clone_summary ()
4173 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4175 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4178 cgraph_edge
*prev_clone
;
4179 cgraph_edge
*next_clone
;
4182 class edge_clone_summary_t
:
4183 public call_summary
<edge_clone_summary
*>
4186 edge_clone_summary_t (symbol_table
*symtab
):
4187 call_summary
<edge_clone_summary
*> (symtab
)
4189 m_initialize_when_cloning
= true;
4192 virtual void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4193 edge_clone_summary
*src_data
,
4194 edge_clone_summary
*dst_data
);
4197 /* Edge duplication hook. */
4200 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4201 edge_clone_summary
*src_data
,
4202 edge_clone_summary
*dst_data
)
4204 if (src_data
->next_clone
)
4205 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4206 dst_data
->prev_clone
= src_edge
;
4207 dst_data
->next_clone
= src_data
->next_clone
;
4208 src_data
->next_clone
= dst_edge
;
4211 /* Return true is CS calls DEST or its clone for all contexts. When
4212 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4213 edges from/to an all-context clone. */
4216 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4217 bool allow_recursion_to_clone
)
4219 enum availability availability
;
4220 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4222 if (availability
<= AVAIL_INTERPOSABLE
)
4226 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4229 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4230 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4233 /* Return true if edge CS does bring about the value described by SRC to
4234 DEST_VAL of node DEST or its clone for all contexts. */
4237 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4238 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4240 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4242 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4243 || caller_info
->node_dead
)
4249 if (caller_info
->ipcp_orig_node
)
4252 if (src
->offset
== -1)
4253 t
= caller_info
->known_csts
[src
->index
];
4255 t
= get_clone_agg_value (cs
->caller
, src
->offset
, src
->index
);
4256 return (t
!= NULL_TREE
4257 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4261 if (src
->val
== dest_val
)
4264 struct ipcp_agg_lattice
*aglat
;
4265 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4267 if (src
->offset
== -1)
4268 return (plats
->itself
.is_single_const ()
4269 && values_equal_for_ipcp_p (src
->val
->value
,
4270 plats
->itself
.values
->value
));
4273 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4275 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4276 if (aglat
->offset
== src
->offset
)
4277 return (aglat
->is_single_const ()
4278 && values_equal_for_ipcp_p (src
->val
->value
,
4279 aglat
->values
->value
));
4285 /* Return true if edge CS does bring about the value described by SRC to
4286 DST_VAL of node DEST or its clone for all contexts. */
4289 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4290 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4292 ipcp_value
<ipa_polymorphic_call_context
> *)
4294 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4296 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4297 || caller_info
->node_dead
)
4302 if (caller_info
->ipcp_orig_node
)
4303 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4304 && values_equal_for_ipcp_p (src
->val
->value
,
4305 caller_info
->known_contexts
[src
->index
]);
4307 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4309 return plats
->ctxlat
.is_single_const ()
4310 && values_equal_for_ipcp_p (src
->val
->value
,
4311 plats
->ctxlat
.values
->value
);
4314 /* Get the next clone in the linked list of clones of an edge. */
4316 static inline struct cgraph_edge
*
4317 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4319 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4320 return s
!= NULL
? s
->next_clone
: NULL
;
4323 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4324 of them is viable and hot, return true. In that case, for those that still
4325 hold, add their edge frequency and their number and cumulative profile
4326 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4327 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4329 template <typename valtype
>
4331 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4332 sreal
*freq_sum
, int *caller_count
,
4333 profile_count
*rec_count_sum
,
4334 profile_count
*nonrec_count_sum
)
4336 ipcp_value_source
<valtype
> *src
;
4339 profile_count rec_cnt
= profile_count::zero ();
4340 profile_count nonrec_cnt
= profile_count::zero ();
4342 bool non_self_recursive
= false;
4344 for (src
= val
->sources
; src
; src
= src
->next
)
4346 struct cgraph_edge
*cs
= src
->cs
;
4349 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4352 freq
+= cs
->sreal_frequency ();
4353 hot
|= cs
->maybe_hot_p ();
4354 if (cs
->caller
!= dest
)
4356 non_self_recursive
= true;
4357 if (cs
->count
.ipa ().initialized_p ())
4358 rec_cnt
+= cs
->count
.ipa ();
4360 else if (cs
->count
.ipa ().initialized_p ())
4361 nonrec_cnt
+= cs
->count
.ipa ();
4363 cs
= get_next_cgraph_edge_clone (cs
);
4367 /* If the only edges bringing a value are self-recursive ones, do not bother
4369 if (!non_self_recursive
)
4373 *caller_count
= count
;
4374 *rec_count_sum
= rec_cnt
;
4375 *nonrec_count_sum
= nonrec_cnt
;
4377 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4379 struct cgraph_edge
*cs
;
4381 /* Cold non-SCC source edge could trigger hot recursive execution of
4382 function. Consider the case as hot and rely on following cost model
4383 computation to further select right one. */
4384 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4385 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4392 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4393 to let a non-self-recursive caller be the first element. Thus, we can
4394 simplify intersecting operations on values that arrive from all of these
4395 callers, especially when there exists self-recursive call. Return true if
4396 this kind of adjustment is possible. */
4399 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4402 for (unsigned i
= 0; i
< callers
.length (); i
++)
4404 cgraph_edge
*cs
= callers
[i
];
4406 if (cs
->caller
!= node
)
4410 callers
[i
] = callers
[0];
4419 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4420 is assumed their number is known and equal to CALLER_COUNT. */
4422 template <typename valtype
>
4423 static vec
<cgraph_edge
*>
4424 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4427 ipcp_value_source
<valtype
> *src
;
4428 vec
<cgraph_edge
*> ret
;
4430 ret
.create (caller_count
);
4431 for (src
= val
->sources
; src
; src
= src
->next
)
4433 struct cgraph_edge
*cs
= src
->cs
;
4436 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4437 ret
.quick_push (cs
);
4438 cs
= get_next_cgraph_edge_clone (cs
);
4442 if (caller_count
> 1)
4443 adjust_callers_for_value_intersection (ret
, dest
);
4448 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4449 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4450 should be set to true when the reference created for the constant should be
4451 a load one and not an address one because the corresponding parameter p is
4454 static struct ipa_replace_map
*
4455 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4456 bool force_load_ref
)
4458 struct ipa_replace_map
*replace_map
;
4460 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4463 fprintf (dump_file
, " replacing ");
4464 ipa_dump_param (dump_file
, info
, parm_num
);
4466 fprintf (dump_file
, " with const ");
4467 print_generic_expr (dump_file
, value
);
4470 fprintf (dump_file
, " - forcing load reference\n");
4472 fprintf (dump_file
, "\n");
4474 replace_map
->parm_num
= parm_num
;
4475 replace_map
->new_tree
= value
;
4476 replace_map
->force_load_ref
= force_load_ref
;
4480 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4481 one, otherwise it will be referred to as the original node. */
4484 dump_profile_updates (cgraph_node
*node
, bool spec
)
4487 fprintf (dump_file
, " setting count of the specialized node %s to ",
4488 node
->dump_name ());
4490 fprintf (dump_file
, " setting count of the original node %s to ",
4491 node
->dump_name ());
4493 node
->count
.dump (dump_file
);
4494 fprintf (dump_file
, "\n");
4495 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4497 fprintf (dump_file
, " edge to %s has count ",
4498 cs
->callee
->dump_name ());
4499 cs
->count
.dump (dump_file
);
4500 fprintf (dump_file
, "\n");
4504 /* With partial train run we do not want to assume that original's count is
4505 zero whenever we redurect all executed edges to clone. Simply drop profile
4506 to local one in this case. In eany case, return the new value. ORIG_NODE
4507 is the original node and its count has not been updaed yet. */
4510 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4512 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4513 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4514 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4515 remainder
= remainder
.guessed_local ();
4520 /* Structure to sum counts coming from nodes other than the original node and
4523 struct gather_other_count_struct
4526 profile_count other_count
;
4529 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4530 counts that come from non-self-recursive calls.. */
4533 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4535 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4536 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4537 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4538 desc
->other_count
+= cs
->count
.ipa ();
4542 /* Structure to help analyze if we need to boost counts of some clones of some
4543 non-recursive edges to match the new callee count. */
4545 struct desc_incoming_count_struct
4548 hash_set
<cgraph_edge
*> *processed_edges
;
4549 profile_count count
;
4550 unsigned unproc_orig_rec_edges
;
4553 /* Go over edges calling NODE and its thunks and gather information about
4554 incoming counts so that we know if we need to make any adjustments. */
4557 analyze_clone_icoming_counts (cgraph_node
*node
,
4558 desc_incoming_count_struct
*desc
)
4560 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4561 if (cs
->caller
->thunk
)
4563 analyze_clone_icoming_counts (cs
->caller
, desc
);
4568 if (cs
->count
.initialized_p ())
4569 desc
->count
+= cs
->count
.ipa ();
4570 if (!desc
->processed_edges
->contains (cs
)
4571 && cs
->caller
->clone_of
== desc
->orig
)
4572 desc
->unproc_orig_rec_edges
++;
4576 /* If caller edge counts of a clone created for a self-recursive arithmetic
4577 jump function must be adjusted because it is coming from a the "seed" clone
4578 for the first value and so has been excessively scaled back as if it was not
4579 a recursive call, adjust it so that the incoming counts of NODE match its
4580 count. NODE is the node or its thunk. */
4583 adjust_clone_incoming_counts (cgraph_node
*node
,
4584 desc_incoming_count_struct
*desc
)
4586 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4587 if (cs
->caller
->thunk
)
4589 adjust_clone_incoming_counts (cs
->caller
, desc
);
4590 profile_count sum
= profile_count::zero ();
4591 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4592 if (e
->count
.initialized_p ())
4593 sum
+= e
->count
.ipa ();
4594 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4596 else if (!desc
->processed_edges
->contains (cs
)
4597 && cs
->caller
->clone_of
== desc
->orig
)
4599 cs
->count
+= desc
->count
;
4602 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4603 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4604 cs
->callee
->dump_name ());
4605 cs
->count
.dump (dump_file
);
4606 fprintf (dump_file
, "\n");
4611 /* When ORIG_NODE has been cloned for values which have been generated fora
4612 self-recursive call as a result of an arithmetic pass-through
4613 jump-functions, adjust its count together with counts of all such clones in
4614 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4616 The function sums the counts of the original node and all its clones that
4617 cannot be attributed to a specific clone because it comes from a
4618 non-recursive edge. This sum is then evenly divided between the clones and
4619 on top of that each one gets all the counts which can be attributed directly
4623 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4624 const vec
<cgraph_node
*> &self_gen_clones
)
4626 profile_count redist_sum
= orig_node
->count
.ipa ();
4627 if (!(redist_sum
> profile_count::zero ()))
4631 fprintf (dump_file
, " Updating profile of self recursive clone "
4634 gather_other_count_struct gocs
;
4635 gocs
.orig
= orig_node
;
4636 gocs
.other_count
= profile_count::zero ();
4638 auto_vec
<profile_count
, 8> other_edges_count
;
4639 for (cgraph_node
*n
: self_gen_clones
)
4641 gocs
.other_count
= profile_count::zero ();
4642 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4644 other_edges_count
.safe_push (gocs
.other_count
);
4645 redist_sum
-= gocs
.other_count
;
4648 hash_set
<cgraph_edge
*> processed_edges
;
4650 for (cgraph_node
*n
: self_gen_clones
)
4652 profile_count orig_count
= n
->count
;
4653 profile_count new_count
4654 = (redist_sum
.apply_scale (1, self_gen_clones
.length ())
4655 + other_edges_count
[i
]);
4656 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4657 n
->count
= new_count
;
4658 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4659 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4661 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4662 processed_edges
.add (cs
);
4664 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4665 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4670 /* There are still going to be edges to ORIG_NODE that have one or more
4671 clones coming from another node clone in SELF_GEN_CLONES and which we
4672 scaled by the same amount, which means that the total incoming sum of
4673 counts to ORIG_NODE will be too high, scale such edges back. */
4674 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4676 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4679 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4680 if (e
->callee
->ultimate_alias_target () == orig_node
4681 && processed_edges
.contains (e
))
4684 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4685 if (e
->callee
->ultimate_alias_target () == orig_node
4686 && processed_edges
.contains (e
))
4687 e
->count
= e
->count
.apply_scale (1, den
);
4691 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4692 along self-recursive edges are likely to have fairly low count and so
4693 edges from them to nodes in the self_gen_clones do not correspond to the
4694 artificially distributed count of the nodes, the total sum of incoming
4695 edges to some clones might be too low. Detect this situation and correct
4697 for (cgraph_node
*n
: self_gen_clones
)
4699 if (!(n
->count
.ipa () > profile_count::zero ()))
4702 desc_incoming_count_struct desc
;
4703 desc
.orig
= orig_node
;
4704 desc
.processed_edges
= &processed_edges
;
4705 desc
.count
= profile_count::zero ();
4706 desc
.unproc_orig_rec_edges
= 0;
4707 analyze_clone_icoming_counts (n
, &desc
);
4709 if (n
->count
.differs_from_p (desc
.count
))
4711 if (n
->count
> desc
.count
4712 && desc
.unproc_orig_rec_edges
> 0)
4714 desc
.count
= n
->count
- desc
.count
;
4716 = desc
.count
.apply_scale (1, desc
.unproc_orig_rec_edges
);
4717 adjust_clone_incoming_counts (n
, &desc
);
4721 " Unable to fix up incoming counts for %s.\n",
4727 for (cgraph_node
*n
: self_gen_clones
)
4728 dump_profile_updates (n
, n
!= orig_node
);
4732 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4733 their profile information to reflect this. This function should not be used
4734 for clones generated for arithmetic pass-through jump functions on a
4735 self-recursive call graph edge, that situation is handled by
4736 update_counts_for_self_gen_clones. */
4739 update_profiling_info (struct cgraph_node
*orig_node
,
4740 struct cgraph_node
*new_node
)
4742 struct caller_statistics stats
;
4743 profile_count new_sum
;
4744 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
4746 if (!(orig_node_count
> profile_count::zero ()))
4751 fprintf (dump_file
, " Updating profile from original count: ");
4752 orig_node_count
.dump (dump_file
);
4753 fprintf (dump_file
, "\n");
4756 init_caller_stats (&stats
, new_node
);
4757 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4759 new_sum
= stats
.count_sum
;
4761 if (new_sum
> orig_node_count
)
4763 /* TODO: Perhaps this should be gcc_unreachable ()? */
4764 remainder
= profile_count::zero ().guessed_local ();
4766 else if (stats
.rec_count_sum
.nonzero_p ())
4768 int new_nonrec_calls
= stats
.n_nonrec_calls
;
4769 /* There are self-recursive edges which are likely to bring in the
4770 majority of calls but which we must divide in between the original and
4772 init_caller_stats (&stats
, orig_node
);
4773 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
4775 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
4776 profile_count orig_nonrec_call_count
= stats
.count_sum
;
4778 if (orig_node
->local
)
4780 if (!orig_nonrec_call_count
.nonzero_p ())
4783 fprintf (dump_file
, " The original is local and the only "
4784 "incoming edges from non-dead callers with nonzero "
4785 "counts are self-recursive, assuming it is cold.\n");
4786 /* The NEW_NODE count and counts of all its outgoing edges
4787 are still unmodified copies of ORIG_NODE's. Just clear
4788 the latter and bail out. */
4790 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4791 zero
= profile_count::zero ().guessed_local ();
4793 zero
= profile_count::adjusted_zero ();
4794 orig_node
->count
= zero
;
4795 for (cgraph_edge
*cs
= orig_node
->callees
;
4797 cs
= cs
->next_callee
)
4799 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4801 cs
= cs
->next_callee
)
4808 /* Let's behave as if there was another caller that accounts for all
4809 the calls that were either indirect or from other compilation
4811 orig_nonrec_calls
++;
4812 profile_count pretend_caller_count
4813 = (orig_node_count
- new_sum
- orig_nonrec_call_count
4814 - stats
.rec_count_sum
);
4815 orig_nonrec_call_count
+= pretend_caller_count
;
4818 /* Divide all "unexplained" counts roughly proportionally to sums of
4819 counts of non-recursive calls.
4821 We put rather arbitrary limits on how many counts we claim because the
4822 number of non-self-recursive incoming count is only a rough guideline
4823 and there are cases (such as mcf) where using it blindly just takes
4824 too many. And if lattices are considered in the opposite order we
4825 could also take too few. */
4826 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
4828 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
4829 profile_count new_part
4830 = MAX(MIN (unexp
.apply_scale (new_sum
,
4831 new_sum
+ orig_nonrec_call_count
),
4832 unexp
.apply_scale (limit_den
- 1, limit_den
)),
4833 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
4836 fprintf (dump_file
, " Claiming ");
4837 new_part
.dump (dump_file
);
4838 fprintf (dump_file
, " of unexplained ");
4839 unexp
.dump (dump_file
);
4840 fprintf (dump_file
, " counts because of self-recursive "
4843 new_sum
+= new_part
;
4844 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4848 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4851 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
4852 new_node
->count
= new_sum
;
4853 orig_node
->count
= remainder
;
4855 profile_count orig_new_node_count
= orig_node_count
;
4856 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
4857 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4858 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4859 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4860 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4862 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
4863 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4864 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4865 for (cgraph_edge
*cs
= orig_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4866 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4870 dump_profile_updates (new_node
, true);
4871 dump_profile_updates (orig_node
, false);
4875 /* Update the respective profile of specialized NEW_NODE and the original
4876 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4877 have been redirected to the specialized version. */
4880 update_specialized_profile (struct cgraph_node
*new_node
,
4881 struct cgraph_node
*orig_node
,
4882 profile_count redirected_sum
)
4884 struct cgraph_edge
*cs
;
4885 profile_count new_node_count
, orig_node_count
= orig_node
->count
;
4889 fprintf (dump_file
, " the sum of counts of redirected edges is ");
4890 redirected_sum
.dump (dump_file
);
4891 fprintf (dump_file
, "\n");
4893 if (!(orig_node_count
> profile_count::zero ()))
4896 gcc_assert (orig_node_count
>= redirected_sum
);
4898 new_node_count
= new_node
->count
;
4899 new_node
->count
+= redirected_sum
;
4900 orig_node
->count
-= redirected_sum
;
4902 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4903 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
4905 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4907 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
4914 dump_profile_updates (new_node
, true);
4915 dump_profile_updates (orig_node
, false);
4919 static void adjust_references_in_caller (cgraph_edge
*cs
,
4920 symtab_node
*symbol
, int index
);
4922 /* Simple structure to pass a symbol and index (with same meaning as parameters
4923 of adjust_references_in_caller) through a void* parameter of a
4924 call_for_symbol_thunks_and_aliases callback. */
4925 struct symbol_and_index_together
4927 symtab_node
*symbol
;
4931 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4932 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4934 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
4936 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
4937 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4938 if (!cs
->caller
->thunk
)
4939 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
4943 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4944 variable which is only dereferenced and which is represented by SYMBOL. See
4945 if we can remove ADDR reference in callers assosiated witht the call. */
4948 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
4950 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
4951 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
4952 if (jfunc
->type
== IPA_JF_CONST
)
4954 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
4958 to_del
->remove_reference ();
4960 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
4961 cs
->caller
->dump_name (), symbol
->dump_name ());
4965 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
4966 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
4969 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
4970 cgraph_node
*caller
= cs
->caller
;
4971 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
4972 /* TODO: This consistency check may be too big and not really
4973 that useful. Consider removing it. */
4975 if (caller_info
->ipcp_orig_node
)
4976 cst
= caller_info
->known_csts
[fidx
];
4979 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
4980 gcc_assert (lat
->is_single_const ());
4981 cst
= lat
->values
->value
;
4983 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
4984 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
4987 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
4988 if (cuses
== IPA_UNDESCRIBED_USE
)
4990 gcc_assert (cuses
> 0);
4992 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
4996 if (caller_info
->ipcp_orig_node
)
4998 /* Cloning machinery has created a reference here, we need to either
4999 remove it or change it to a read one. */
5000 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0);
5001 if (to_del
&& to_del
->use
== IPA_REF_ADDR
)
5003 to_del
->remove_reference ();
5005 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5006 cs
->caller
->dump_name (), symbol
->dump_name ());
5007 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
5009 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
5012 " ...and replaced it with LOAD one.\n");
5017 symbol_and_index_together pack
;
5018 pack
.symbol
= symbol
;
5020 if (caller
->can_change_signature
)
5021 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5026 /* Return true if we would like to remove a parameter from NODE when cloning it
5027 with KNOWN_CSTS scalar constants. */
5030 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5032 auto_vec
<bool, 16> surviving
;
5033 bool filled_vec
= false;
5034 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5035 int i
, count
= ipa_get_param_count (info
);
5037 for (i
= 0; i
< count
; i
++)
5039 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5044 clone_info
*info
= clone_info::get (node
);
5045 if (!info
|| !info
->param_adjustments
)
5047 info
->param_adjustments
->get_surviving_params (&surviving
);
5050 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5056 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5057 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5058 redirect all edges in CALLERS to it. */
5060 static struct cgraph_node
*
5061 create_specialized_node (struct cgraph_node
*node
,
5062 vec
<tree
> known_csts
,
5063 vec
<ipa_polymorphic_call_context
> known_contexts
,
5064 struct ipa_agg_replacement_value
*aggvals
,
5065 vec
<cgraph_edge
*> &callers
)
5067 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5068 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5069 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5070 struct ipa_agg_replacement_value
*av
;
5071 struct cgraph_node
*new_node
;
5072 int i
, count
= ipa_get_param_count (info
);
5073 clone_info
*cinfo
= clone_info::get (node
);
5074 ipa_param_adjustments
*old_adjustments
= cinfo
5075 ? cinfo
->param_adjustments
: NULL
;
5076 ipa_param_adjustments
*new_adjustments
;
5077 gcc_assert (!info
->ipcp_orig_node
);
5078 gcc_assert (node
->can_change_signature
5079 || !old_adjustments
);
5081 if (old_adjustments
)
5083 /* At the moment all IPA optimizations should use the number of
5084 parameters of the prevailing decl as the m_always_copy_start.
5085 Handling any other value would complicate the code below, so for the
5086 time bing let's only assert it is so. */
5087 gcc_assert (old_adjustments
->m_always_copy_start
== count
5088 || old_adjustments
->m_always_copy_start
< 0);
5089 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5090 for (i
= 0; i
< old_adj_count
; i
++)
5092 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5093 if (!node
->can_change_signature
5094 || old_adj
->op
!= IPA_PARAM_OP_COPY
5095 || (!known_csts
[old_adj
->base_index
]
5096 && ipa_is_param_used (info
, old_adj
->base_index
)))
5098 ipa_adjusted_param new_adj
= *old_adj
;
5100 new_adj
.prev_clone_adjustment
= true;
5101 new_adj
.prev_clone_index
= i
;
5102 vec_safe_push (new_params
, new_adj
);
5105 bool skip_return
= old_adjustments
->m_skip_return
;
5106 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5107 ipa_param_adjustments (new_params
, count
,
5110 else if (node
->can_change_signature
5111 && want_remove_some_param_p (node
, known_csts
))
5113 ipa_adjusted_param adj
;
5114 memset (&adj
, 0, sizeof (adj
));
5115 adj
.op
= IPA_PARAM_OP_COPY
;
5116 for (i
= 0; i
< count
; i
++)
5117 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5120 adj
.prev_clone_index
= i
;
5121 vec_safe_push (new_params
, adj
);
5123 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5124 ipa_param_adjustments (new_params
, count
, false));
5127 new_adjustments
= NULL
;
5129 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5130 for (i
= callers
.length () - 1; i
>= 0; i
--)
5132 cgraph_edge
*cs
= callers
[i
];
5133 if (cs
->caller
== node
)
5135 self_recursive_calls
.safe_push (cs
);
5136 callers
.unordered_remove (i
);
5139 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5140 for (i
= 0; i
< count
; i
++)
5142 tree t
= known_csts
[i
];
5146 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5148 bool load_ref
= false;
5149 symtab_node
*ref_symbol
;
5150 if (TREE_CODE (t
) == ADDR_EXPR
)
5152 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5153 if (TREE_CODE (base
) == VAR_DECL
5154 && ipa_get_controlled_uses (info
, i
) == 0
5155 && ipa_get_param_load_dereferenced (info
, i
)
5156 && (ref_symbol
= symtab_node::get (base
)))
5159 if (node
->can_change_signature
)
5160 for (cgraph_edge
*caller
: callers
)
5161 adjust_references_in_caller (caller
, ref_symbol
, i
);
5165 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5167 vec_safe_push (replace_trees
, replace_map
);
5170 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5171 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5173 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5174 new_adjustments
, "constprop",
5178 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5179 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5181 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5182 /* Cloned edges can disappear during cloning as speculation can be
5183 resolved, check that we have one and that it comes from the last
5185 if (cs
&& cs
->caller
== new_node
)
5186 cs
->redirect_callee_duplicating_thunks (new_node
);
5187 /* Any future code that would make more than one clone of an outgoing
5188 edge would confuse this mechanism, so let's check that does not
5190 gcc_checking_assert (!cs
5191 || !get_next_cgraph_edge_clone (cs
)
5192 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5194 if (have_self_recursive_calls
)
5195 new_node
->expand_all_artificial_thunks ();
5197 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5198 for (av
= aggvals
; av
; av
= av
->next
)
5199 new_node
->maybe_create_reference (av
->value
, NULL
);
5201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5203 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5204 if (known_contexts
.exists ())
5206 for (i
= 0; i
< count
; i
++)
5207 if (!known_contexts
[i
].useless_p ())
5209 fprintf (dump_file
, " known ctx %i is ", i
);
5210 known_contexts
[i
].dump (dump_file
);
5214 ipa_dump_agg_replacement_values (dump_file
, aggvals
);
5217 new_info
= ipa_node_params_sum
->get (new_node
);
5218 new_info
->ipcp_orig_node
= node
;
5219 new_node
->ipcp_clone
= true;
5220 new_info
->known_csts
= known_csts
;
5221 new_info
->known_contexts
= known_contexts
;
5223 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
, aggvals
);
5228 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5229 pass-through function to itself when the cgraph_node involved is not an
5230 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5231 no-operation pass-through. */
5234 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5237 enum availability availability
;
5238 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5239 && availability
> AVAIL_INTERPOSABLE
5240 && jfunc
->type
== IPA_JF_PASS_THROUGH
5241 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5242 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5243 && ipa_node_params_sum
->get (cs
->caller
)
5244 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5249 /* Return true if JFUNC, which describes a part of an aggregate represented or
5250 pointed to by the i-th parameter of call CS, is a pass-through function to
5251 itself when the cgraph_node involved is not an IPA-CP clone.. When
5252 SIMPLE is true, further check if JFUNC is a simple no-operation
5256 self_recursive_agg_pass_through_p (cgraph_edge
*cs
, ipa_agg_jf_item
*jfunc
,
5257 int i
, bool simple
= true)
5259 enum availability availability
;
5260 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5261 && availability
> AVAIL_INTERPOSABLE
5262 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5263 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5264 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5265 && jfunc
->value
.pass_through
.formal_id
== i
5266 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5267 && ipa_node_params_sum
->get (cs
->caller
)
5268 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5273 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5274 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5277 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5278 vec
<tree
> &known_csts
,
5279 const vec
<cgraph_edge
*> &callers
)
5281 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5282 int i
, count
= ipa_get_param_count (info
);
5284 for (i
= 0; i
< count
; i
++)
5286 struct cgraph_edge
*cs
;
5287 tree newval
= NULL_TREE
;
5290 tree type
= ipa_get_type (info
, i
);
5292 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5295 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5297 struct ipa_jump_func
*jump_func
;
5300 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5302 || i
>= ipa_get_cs_argument_count (args
)
5304 && call_passes_through_thunk (cs
)))
5309 jump_func
= ipa_get_ith_jump_func (args
, i
);
5311 /* Besides simple pass-through jump function, arithmetic jump
5312 function could also introduce argument-direct-pass-through for
5313 self-feeding recursive call. For example,
5320 Given that i is 0, recursive propagation via (i & 1) also gets
5322 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5324 gcc_assert (newval
);
5325 t
= ipa_get_jf_arith_result (
5326 ipa_get_jf_pass_through_operation (jump_func
),
5328 ipa_get_jf_pass_through_operand (jump_func
),
5332 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5336 && !values_equal_for_ipcp_p (t
, newval
))
5337 || (!first
&& !newval
))
5349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5351 fprintf (dump_file
, " adding an extra known scalar value ");
5352 print_ipcp_constant_value (dump_file
, newval
);
5353 fprintf (dump_file
, " for ");
5354 ipa_dump_param (dump_file
, info
, i
);
5355 fprintf (dump_file
, "\n");
5358 known_csts
[i
] = newval
;
5363 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5364 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5368 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5369 vec
<ipa_polymorphic_call_context
>
5371 const vec
<cgraph_edge
*> &callers
)
5373 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5374 int i
, count
= ipa_get_param_count (info
);
5376 for (i
= 0; i
< count
; i
++)
5380 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5381 || (known_contexts
->exists ()
5382 && !(*known_contexts
)[i
].useless_p ()))
5385 ipa_polymorphic_call_context newval
;
5389 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5391 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5393 || i
>= ipa_get_cs_argument_count (args
))
5395 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5396 ipa_polymorphic_call_context ctx
;
5397 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5405 newval
.meet_with (ctx
);
5406 if (newval
.useless_p ())
5410 if (!newval
.useless_p ())
5412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5414 fprintf (dump_file
, " adding an extra known polymorphic "
5416 print_ipcp_constant_value (dump_file
, newval
);
5417 fprintf (dump_file
, " for ");
5418 ipa_dump_param (dump_file
, info
, i
);
5419 fprintf (dump_file
, "\n");
5422 if (!known_contexts
->exists ())
5423 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5425 (*known_contexts
)[i
] = newval
;
5431 /* Go through PLATS and create a vector of values consisting of values and
5432 offsets (minus OFFSET) of lattices that contain only a single value. */
5434 static vec
<ipa_agg_value
>
5435 copy_plats_to_inter (class ipcp_param_lattices
*plats
, HOST_WIDE_INT offset
)
5437 vec
<ipa_agg_value
> res
= vNULL
;
5439 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5442 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
5443 if (aglat
->is_single_const ())
5445 struct ipa_agg_value ti
;
5446 ti
.offset
= aglat
->offset
- offset
;
5447 ti
.value
= aglat
->values
->value
;
5453 /* Intersect all values in INTER with single value lattices in PLATS (while
5454 subtracting OFFSET). */
5457 intersect_with_plats (class ipcp_param_lattices
*plats
,
5458 vec
<ipa_agg_value
> *inter
,
5459 HOST_WIDE_INT offset
)
5461 struct ipcp_agg_lattice
*aglat
;
5462 struct ipa_agg_value
*item
;
5465 if (!plats
->aggs
|| plats
->aggs_contain_variable
|| plats
->aggs_bottom
)
5471 aglat
= plats
->aggs
;
5472 FOR_EACH_VEC_ELT (*inter
, k
, item
)
5479 if (aglat
->offset
- offset
> item
->offset
)
5481 if (aglat
->offset
- offset
== item
->offset
)
5483 if (aglat
->is_single_const ())
5485 tree value
= aglat
->values
->value
;
5487 if (values_equal_for_ipcp_p (item
->value
, value
))
5492 aglat
= aglat
->next
;
5495 item
->value
= NULL_TREE
;
5499 /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
5500 vector result while subtracting OFFSET from the individual value offsets. */
5502 static vec
<ipa_agg_value
>
5503 agg_replacements_to_vector (struct cgraph_node
*node
, int index
,
5504 HOST_WIDE_INT offset
)
5506 struct ipa_agg_replacement_value
*av
;
5507 vec
<ipa_agg_value
> res
= vNULL
;
5509 for (av
= ipa_get_agg_replacements_for_node (node
); av
; av
= av
->next
)
5510 if (av
->index
== index
5511 && (av
->offset
- offset
) >= 0)
5513 struct ipa_agg_value item
;
5514 gcc_checking_assert (av
->value
);
5515 item
.offset
= av
->offset
- offset
;
5516 item
.value
= av
->value
;
5517 res
.safe_push (item
);
5523 /* Intersect all values in INTER with those that we have already scheduled to
5524 be replaced in parameter number INDEX of NODE, which is an IPA-CP clone
5525 (while subtracting OFFSET). */
5528 intersect_with_agg_replacements (struct cgraph_node
*node
, int index
,
5529 vec
<ipa_agg_value
> *inter
,
5530 HOST_WIDE_INT offset
)
5532 struct ipa_agg_replacement_value
*srcvals
;
5533 struct ipa_agg_value
*item
;
5536 srcvals
= ipa_get_agg_replacements_for_node (node
);
5543 FOR_EACH_VEC_ELT (*inter
, i
, item
)
5545 struct ipa_agg_replacement_value
*av
;
5549 for (av
= srcvals
; av
; av
= av
->next
)
5551 gcc_checking_assert (av
->value
);
5552 if (av
->index
== index
5553 && av
->offset
- offset
== item
->offset
)
5555 if (values_equal_for_ipcp_p (item
->value
, av
->value
))
5561 item
->value
= NULL_TREE
;
5565 /* Intersect values in INTER with aggregate values that come along edge CS to
5566 parameter number INDEX and return it. If INTER does not actually exist yet,
5567 copy all incoming values to it. If we determine we ended up with no values
5568 whatsoever, return a released vector. */
5570 static vec
<ipa_agg_value
>
5571 intersect_aggregates_with_edge (struct cgraph_edge
*cs
, int index
,
5572 vec
<ipa_agg_value
> inter
)
5574 struct ipa_jump_func
*jfunc
;
5575 jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), index
);
5576 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5577 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5579 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5580 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5582 if (caller_info
->ipcp_orig_node
)
5584 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5585 class ipcp_param_lattices
*orig_plats
;
5586 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5587 orig_plats
= ipa_get_parm_lattices (orig_info
, src_idx
);
5588 if (agg_pass_through_permissible_p (orig_plats
, jfunc
))
5590 if (!inter
.exists ())
5591 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, 0);
5593 intersect_with_agg_replacements (cs
->caller
, src_idx
,
5600 class ipcp_param_lattices
*src_plats
;
5601 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5602 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
5604 /* Currently we do not produce clobber aggregate jump
5605 functions, adjust when we do. */
5606 gcc_checking_assert (!jfunc
->agg
.items
);
5607 if (!inter
.exists ())
5608 inter
= copy_plats_to_inter (src_plats
, 0);
5610 intersect_with_plats (src_plats
, &inter
, 0);
5615 else if (jfunc
->type
== IPA_JF_ANCESTOR
5616 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5618 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5619 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5620 class ipcp_param_lattices
*src_plats
;
5621 HOST_WIDE_INT delta
= ipa_get_jf_ancestor_offset (jfunc
);
5623 if (caller_info
->ipcp_orig_node
)
5625 if (!inter
.exists ())
5626 inter
= agg_replacements_to_vector (cs
->caller
, src_idx
, delta
);
5628 intersect_with_agg_replacements (cs
->caller
, src_idx
, &inter
,
5633 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
5634 /* Currently we do not produce clobber aggregate jump
5635 functions, adjust when we do. */
5636 gcc_checking_assert (!src_plats
->aggs
|| !jfunc
->agg
.items
);
5637 if (!inter
.exists ())
5638 inter
= copy_plats_to_inter (src_plats
, delta
);
5640 intersect_with_plats (src_plats
, &inter
, delta
);
5645 if (jfunc
->agg
.items
)
5647 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5648 struct ipa_agg_value
*item
;
5651 if (!inter
.exists ())
5652 for (unsigned i
= 0; i
< jfunc
->agg
.items
->length (); i
++)
5654 struct ipa_agg_jf_item
*agg_item
= &(*jfunc
->agg
.items
)[i
];
5655 tree value
= ipa_agg_value_from_node (caller_info
, cs
->caller
,
5659 struct ipa_agg_value agg_value
;
5661 agg_value
.value
= value
;
5662 agg_value
.offset
= agg_item
->offset
;
5663 inter
.safe_push (agg_value
);
5667 FOR_EACH_VEC_ELT (inter
, k
, item
)
5675 while ((unsigned) l
< jfunc
->agg
.items
->length ())
5677 struct ipa_agg_jf_item
*ti
;
5678 ti
= &(*jfunc
->agg
.items
)[l
];
5679 if (ti
->offset
> item
->offset
)
5681 if (ti
->offset
== item
->offset
)
5685 /* Besides simple pass-through aggregate jump function,
5686 arithmetic aggregate jump function could also bring
5687 same aggregate value as parameter passed-in for
5688 self-feeding recursive call. For example,
5696 Given that *i is 0, recursive propagation via (*i & 1)
5698 if (self_recursive_agg_pass_through_p (cs
, ti
, index
,
5700 value
= ipa_get_jf_arith_result (
5701 ti
->value
.pass_through
.operation
,
5703 ti
->value
.pass_through
.operand
,
5706 value
= ipa_agg_value_from_node (caller_info
,
5709 if (value
&& values_equal_for_ipcp_p (item
->value
, value
))
5727 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5728 from all of them. */
5730 static struct ipa_agg_replacement_value
*
5731 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5732 const vec
<cgraph_edge
*> &callers
)
5734 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5735 struct ipa_agg_replacement_value
*res
;
5736 struct ipa_agg_replacement_value
**tail
= &res
;
5737 struct cgraph_edge
*cs
;
5738 int i
, j
, count
= ipa_get_param_count (dest_info
);
5740 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5742 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5748 int c
= ipa_get_cs_argument_count (args
);
5753 for (i
= 0; i
< count
; i
++)
5755 struct cgraph_edge
*cs
;
5756 vec
<ipa_agg_value
> inter
= vNULL
;
5757 struct ipa_agg_value
*item
;
5758 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, i
);
5761 /* Among other things, the following check should deal with all by_ref
5763 if (plats
->aggs_bottom
)
5766 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5768 struct ipa_jump_func
*jfunc
5769 = ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
), i
);
5770 if (self_recursive_pass_through_p (cs
, jfunc
, i
)
5771 && (!plats
->aggs_by_ref
5772 || ipa_get_jf_pass_through_agg_preserved (jfunc
)))
5774 inter
= intersect_aggregates_with_edge (cs
, i
, inter
);
5776 if (!inter
.exists ())
5780 FOR_EACH_VEC_ELT (inter
, j
, item
)
5782 struct ipa_agg_replacement_value
*v
;
5787 v
= ggc_alloc
<ipa_agg_replacement_value
> ();
5789 v
->offset
= item
->offset
;
5790 v
->value
= item
->value
;
5791 v
->by_ref
= plats
->aggs_by_ref
;
5797 if (inter
.exists ())
5804 /* Determine whether CS also brings all scalar values that the NODE is
5808 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5809 struct cgraph_node
*node
)
5811 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5812 int count
= ipa_get_param_count (dest_info
);
5813 class ipa_node_params
*caller_info
;
5814 class ipa_edge_args
*args
;
5817 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5818 args
= ipa_edge_args_sum
->get (cs
);
5819 for (i
= 0; i
< count
; i
++)
5821 struct ipa_jump_func
*jump_func
;
5824 val
= dest_info
->known_csts
[i
];
5828 if (i
>= ipa_get_cs_argument_count (args
))
5830 jump_func
= ipa_get_ith_jump_func (args
, i
);
5831 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5832 ipa_get_type (dest_info
, i
));
5833 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5839 /* Determine whether CS also brings all aggregate values that NODE is
5842 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5843 struct cgraph_node
*node
)
5845 struct ipa_agg_replacement_value
*aggval
;
5848 aggval
= ipa_get_agg_replacements_for_node (node
);
5852 ipa_node_params
*clone_node_info
= ipa_node_params_sum
->get (node
);
5853 count
= ipa_get_param_count (clone_node_info
);
5854 ec
= ipa_get_cs_argument_count (ipa_edge_args_sum
->get (cs
));
5856 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5857 if (aggval
->index
>= ec
)
5860 ipa_node_params
*orig_node_info
5861 = ipa_node_params_sum
->get (clone_node_info
->ipcp_orig_node
);
5863 for (i
= 0; i
< count
; i
++)
5865 class ipcp_param_lattices
*plats
;
5866 bool interesting
= false;
5867 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5868 if (aggval
->index
== i
)
5876 plats
= ipa_get_parm_lattices (orig_node_info
, aggval
->index
);
5877 if (plats
->aggs_bottom
)
5880 vec
<ipa_agg_value
> values
= intersect_aggregates_with_edge (cs
, i
, vNULL
);
5881 if (!values
.exists ())
5884 for (struct ipa_agg_replacement_value
*av
= aggval
; av
; av
= av
->next
)
5885 if (aggval
->index
== i
)
5887 struct ipa_agg_value
*item
;
5890 FOR_EACH_VEC_ELT (values
, j
, item
)
5892 && item
->offset
== av
->offset
5893 && values_equal_for_ipcp_p (item
->value
, av
->value
))
5909 /* Given an original NODE and a VAL for which we have already created a
5910 specialized clone, look whether there are incoming edges that still lead
5911 into the old node but now also bring the requested value and also conform to
5912 all other criteria such that they can be redirected the special node.
5913 This function can therefore redirect the final edge in a SCC. */
5915 template <typename valtype
>
5917 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5919 ipcp_value_source
<valtype
> *src
;
5920 profile_count redirected_sum
= profile_count::zero ();
5922 for (src
= val
->sources
; src
; src
= src
->next
)
5924 struct cgraph_edge
*cs
= src
->cs
;
5927 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5928 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5929 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5932 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5933 cs
->caller
->dump_name (),
5934 val
->spec_node
->dump_name ());
5936 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
5937 val
->spec_node
->expand_all_artificial_thunks ();
5938 if (cs
->count
.ipa ().initialized_p ())
5939 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
5941 cs
= get_next_cgraph_edge_clone (cs
);
5945 if (redirected_sum
.nonzero_p ())
5946 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
5949 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5952 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
5954 ipa_polymorphic_call_context
*ctx
;
5957 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
5958 if (!ctx
->useless_p ())
5963 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5965 static vec
<ipa_polymorphic_call_context
>
5966 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
5968 if (known_contexts_useful_p (known_contexts
))
5969 return known_contexts
.copy ();
5974 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5975 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5979 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5980 vec
<tree
> *known_csts
,
5981 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5982 ipcp_value
<tree
> *val
, int index
)
5984 *known_csts
= avals
->m_known_vals
.copy ();
5985 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5986 (*known_csts
)[index
] = val
->value
;
5989 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5990 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5994 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5995 vec
<tree
> *known_csts
,
5996 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5997 ipcp_value
<ipa_polymorphic_call_context
> *val
,
6000 *known_csts
= avals
->m_known_vals
.copy ();
6001 *known_contexts
= avals
->m_known_contexts
.copy ();
6002 (*known_contexts
)[index
] = val
->value
;
6005 /* Return true if OFFSET indicates this was not an aggregate value or there is
6006 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
6010 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*aggvals
,
6011 int index
, HOST_WIDE_INT offset
, tree value
)
6018 if (aggvals
->index
== index
6019 && aggvals
->offset
== offset
6020 && values_equal_for_ipcp_p (aggvals
->value
, value
))
6022 aggvals
= aggvals
->next
;
6027 /* Return true if offset is minus one because source of a polymorphic context
6028 cannot be an aggregate value. */
6031 ipcp_val_agg_replacement_ok_p (ipa_agg_replacement_value
*,
6032 int , HOST_WIDE_INT offset
,
6033 ipa_polymorphic_call_context
)
6035 return offset
== -1;
6038 /* Decide whether to create a special version of NODE for value VAL of
6039 parameter at the given INDEX. If OFFSET is -1, the value is for the
6040 parameter itself, otherwise it is stored at the given OFFSET of the
6041 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
6042 is a vector which contains clones created for self-recursive calls with an
6043 arithmetic pass-through jump function. */
6045 template <typename valtype
>
6047 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
6048 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
6049 vec
<cgraph_node
*> *self_gen_clones
)
6051 struct ipa_agg_replacement_value
*aggvals
;
6054 profile_count count_sum
, rec_count_sum
;
6055 vec
<cgraph_edge
*> callers
;
6059 perhaps_add_new_callers (node
, val
);
6062 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
6064 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6065 fprintf (dump_file
, " Ignoring candidate value because "
6066 "maximum unit size would be reached with %li.\n",
6067 val
->local_size_cost
+ overall_size
);
6070 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
6071 &rec_count_sum
, &count_sum
))
6074 if (!dbg_cnt (ipa_cp_values
))
6077 if (val
->self_recursion_generated_p ())
6079 /* The edge counts in this case might not have been adjusted yet.
6080 Nevertleless, even if they were it would be only a guesswork which we
6081 can do now. The recursive part of the counts can be derived from the
6082 count of the original node anyway. */
6083 if (node
->count
.ipa ().nonzero_p ())
6085 unsigned dem
= self_gen_clones
->length () + 1;
6086 rec_count_sum
= node
->count
.ipa ().apply_scale (1, dem
);
6089 rec_count_sum
= profile_count::zero ();
6092 /* get_info_about_necessary_edges only sums up ipa counts. */
6093 count_sum
+= rec_count_sum
;
6095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6097 fprintf (dump_file
, " - considering value ");
6098 print_ipcp_constant_value (dump_file
, val
->value
);
6099 fprintf (dump_file
, " for ");
6100 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
6102 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
6103 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
6106 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
6107 freq_sum
, count_sum
,
6108 val
->local_size_cost
)
6109 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
6110 freq_sum
, count_sum
, val
->prop_size_cost
))
6114 fprintf (dump_file
, " Creating a specialized node of %s.\n",
6115 node
->dump_name ());
6117 vec
<tree
> known_csts
;
6118 vec
<ipa_polymorphic_call_context
> known_contexts
;
6120 callers
= gather_edges_for_value (val
, node
, caller_count
);
6122 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
6125 known_csts
= avals
->m_known_vals
.copy ();
6126 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
6128 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6129 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6130 aggvals
= find_aggregate_values_for_callers_subset (node
, callers
);
6131 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
6132 offset
, val
->value
));
6133 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
6136 if (val
->self_recursion_generated_p ())
6137 self_gen_clones
->safe_push (val
->spec_node
);
6139 update_profiling_info (node
, val
->spec_node
);
6142 overall_size
+= val
->local_size_cost
;
6143 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6144 fprintf (dump_file
, " overall size reached %li\n",
6147 /* TODO: If for some lattice there is only one other known value
6148 left, make a special node for it too. */
6153 /* Decide whether and what specialized clones of NODE should be created. */
6156 decide_whether_version_node (struct cgraph_node
*node
)
6158 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6159 int i
, count
= ipa_get_param_count (info
);
6165 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6166 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6167 node
->dump_name ());
6169 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6170 ipa_auto_call_arg_values avals
;
6171 gather_context_independent_values (info
, &avals
, false, NULL
);
6173 for (i
= 0; i
< count
;i
++)
6175 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6176 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6177 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6180 && !avals
.m_known_vals
[i
])
6182 ipcp_value
<tree
> *val
;
6183 for (val
= lat
->values
; val
; val
= val
->next
)
6185 /* If some values generated for self-recursive calls with
6186 arithmetic jump functions fall outside of the known
6187 value_range for the parameter, we can skip them. VR interface
6188 supports this only for integers now. */
6189 if (TREE_CODE (val
->value
) == INTEGER_CST
6190 && !plats
->m_value_range
.bottom_p ()
6191 && !plats
->m_value_range
.m_vr
.contains_p (val
->value
))
6193 /* This can happen also if a constant present in the source
6194 code falls outside of the range of parameter's type, so we
6196 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6198 fprintf (dump_file
, " - skipping%s value ",
6199 val
->self_recursion_generated_p ()
6200 ? " self_recursion_generated" : "");
6201 print_ipcp_constant_value (dump_file
, val
->value
);
6202 fprintf (dump_file
, " because it is outside known "
6207 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6212 if (!plats
->aggs_bottom
)
6214 struct ipcp_agg_lattice
*aglat
;
6215 ipcp_value
<tree
> *val
;
6216 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6217 if (!aglat
->bottom
&& aglat
->values
6218 /* If the following is false, the one value has been considered
6219 for cloning for all contexts. */
6220 && (plats
->aggs_contain_variable
6221 || !aglat
->is_single_const ()))
6222 for (val
= aglat
->values
; val
; val
= val
->next
)
6223 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6228 && avals
.m_known_contexts
[i
].useless_p ())
6230 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6231 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6232 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6237 if (!self_gen_clones
.is_empty ())
6239 self_gen_clones
.safe_push (node
);
6240 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6243 if (info
->do_clone_for_all_contexts
)
6245 if (!dbg_cnt (ipa_cp_values
))
6247 info
->do_clone_for_all_contexts
= false;
6251 struct cgraph_node
*clone
;
6252 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6254 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6256 cgraph_edge
*cs
= callers
[i
];
6257 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6259 if (caller_info
&& caller_info
->node_dead
)
6260 callers
.unordered_remove (i
);
6263 if (!adjust_callers_for_value_intersection (callers
, node
))
6265 /* If node is not called by anyone, or all its caller edges are
6266 self-recursive, the node is not really in use, no need to do
6268 info
->do_clone_for_all_contexts
= false;
6273 fprintf (dump_file
, " - Creating a specialized node of %s "
6274 "for all known contexts.\n", node
->dump_name ());
6276 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6277 vec
<ipa_polymorphic_call_context
> known_contexts
6278 = copy_useful_known_contexts (avals
.m_known_contexts
);
6279 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6280 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6281 ipa_agg_replacement_value
*aggvals
6282 = find_aggregate_values_for_callers_subset (node
, callers
);
6284 if (!known_contexts_useful_p (known_contexts
))
6286 known_contexts
.release ();
6287 known_contexts
= vNULL
;
6289 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6291 info
->do_clone_for_all_contexts
= false;
6292 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6299 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6302 spread_undeadness (struct cgraph_node
*node
)
6304 struct cgraph_edge
*cs
;
6306 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6307 if (ipa_edge_within_scc (cs
))
6309 struct cgraph_node
*callee
;
6310 class ipa_node_params
*info
;
6312 callee
= cs
->callee
->function_symbol (NULL
);
6313 info
= ipa_node_params_sum
->get (callee
);
6315 if (info
&& info
->node_dead
)
6317 info
->node_dead
= 0;
6318 spread_undeadness (callee
);
6323 /* Return true if NODE has a caller from outside of its SCC that is not
6324 dead. Worker callback for cgraph_for_node_and_aliases. */
6327 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6328 void *data ATTRIBUTE_UNUSED
)
6330 struct cgraph_edge
*cs
;
6332 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6333 if (cs
->caller
->thunk
6334 && cs
->caller
->call_for_symbol_thunks_and_aliases
6335 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6337 else if (!ipa_edge_within_scc (cs
))
6339 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6340 if (!caller_info
/* Unoptimized caller are like dead ones. */
6341 || !caller_info
->node_dead
)
6348 /* Identify nodes within the same SCC as NODE which are no longer needed
6349 because of new clones and will be removed as unreachable. */
6352 identify_dead_nodes (struct cgraph_node
*node
)
6354 struct cgraph_node
*v
;
6355 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6358 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6360 && !v
->call_for_symbol_thunks_and_aliases
6361 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6362 info
->node_dead
= 1;
6365 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6367 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6368 if (info
&& !info
->node_dead
)
6369 spread_undeadness (v
);
6372 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6374 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6375 if (ipa_node_params_sum
->get (v
)
6376 && ipa_node_params_sum
->get (v
)->node_dead
)
6377 fprintf (dump_file
, " Marking node as dead: %s.\n",
6382 /* The decision stage. Iterate over the topological order of call graph nodes
6383 TOPO and make specialized clones if deemed beneficial. */
6386 ipcp_decision_stage (class ipa_topo_info
*topo
)
6391 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6393 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6395 struct cgraph_node
*node
= topo
->order
[i
];
6396 bool change
= false, iterate
= true;
6400 struct cgraph_node
*v
;
6402 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6403 if (v
->has_gimple_body_p ()
6404 && ipcp_versionable_function_p (v
))
6405 iterate
|= decide_whether_version_node (v
);
6410 identify_dead_nodes (node
);
6414 /* Look up all the bits information that we have discovered and copy it over
6415 to the transformation summary. */
6418 ipcp_store_bits_results (void)
6422 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6424 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6425 bool dumped_sth
= false;
6426 bool found_useful_result
= false;
6428 if (!opt_for_fn (node
->decl
, flag_ipa_bit_cp
) || !info
)
6431 fprintf (dump_file
, "Not considering %s for ipa bitwise propagation "
6432 "; -fipa-bit-cp: disabled.\n",
6433 node
->dump_name ());
6437 if (info
->ipcp_orig_node
)
6438 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6439 if (!info
->lattices
)
6440 /* Newly expanded artificial thunks do not have lattices. */
6443 unsigned count
= ipa_get_param_count (info
);
6444 for (unsigned i
= 0; i
< count
; i
++)
6446 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6447 if (plats
->bits_lattice
.constant_p ())
6449 found_useful_result
= true;
6454 if (!found_useful_result
)
6457 ipcp_transformation_initialize ();
6458 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6459 vec_safe_reserve_exact (ts
->bits
, count
);
6461 for (unsigned i
= 0; i
< count
; i
++)
6463 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6466 if (plats
->bits_lattice
.constant_p ())
6469 = ipa_get_ipa_bits_for_value (plats
->bits_lattice
.get_value (),
6470 plats
->bits_lattice
.get_mask ());
6471 if (!dbg_cnt (ipa_cp_bits
))
6477 ts
->bits
->quick_push (jfbits
);
6478 if (!dump_file
|| !jfbits
)
6482 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6483 node
->dump_name ());
6486 fprintf (dump_file
, " param %i: value = ", i
);
6487 print_hex (jfbits
->value
, dump_file
);
6488 fprintf (dump_file
, ", mask = ");
6489 print_hex (jfbits
->mask
, dump_file
);
6490 fprintf (dump_file
, "\n");
6495 /* Look up all VR information that we have discovered and copy it over
6496 to the transformation summary. */
6499 ipcp_store_vr_results (void)
6503 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6505 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6506 bool found_useful_result
= false;
6508 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6511 fprintf (dump_file
, "Not considering %s for VR discovery "
6512 "and propagate; -fipa-ipa-vrp: disabled.\n",
6513 node
->dump_name ());
6517 if (info
->ipcp_orig_node
)
6518 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6519 if (!info
->lattices
)
6520 /* Newly expanded artificial thunks do not have lattices. */
6523 unsigned count
= ipa_get_param_count (info
);
6524 for (unsigned i
= 0; i
< count
; i
++)
6526 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6527 if (!plats
->m_value_range
.bottom_p ()
6528 && !plats
->m_value_range
.top_p ())
6530 found_useful_result
= true;
6534 if (!found_useful_result
)
6537 ipcp_transformation_initialize ();
6538 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6539 vec_safe_reserve_exact (ts
->m_vr
, count
);
6541 for (unsigned i
= 0; i
< count
; i
++)
6543 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6546 if (!plats
->m_value_range
.bottom_p ()
6547 && !plats
->m_value_range
.top_p ()
6548 && dbg_cnt (ipa_cp_vr
))
6551 vr
.type
= plats
->m_value_range
.m_vr
.kind ();
6552 vr
.min
= wi::to_wide (plats
->m_value_range
.m_vr
.min ());
6553 vr
.max
= wi::to_wide (plats
->m_value_range
.m_vr
.max ());
6558 vr
.type
= VR_VARYING
;
6559 vr
.min
= vr
.max
= wi::zero (INT_TYPE_SIZE
);
6561 ts
->m_vr
->quick_push (vr
);
6566 /* The IPCP driver. */
6571 class ipa_topo_info topo
;
6573 if (edge_clone_summaries
== NULL
)
6574 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6576 ipa_check_create_node_params ();
6577 ipa_check_create_edge_args ();
6578 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6582 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6583 if (dump_flags
& TDF_DETAILS
)
6584 ipa_print_all_params (dump_file
);
6585 ipa_print_all_jump_functions (dump_file
);
6588 /* Topological sort. */
6589 build_toporder_info (&topo
);
6590 /* Do the interprocedural propagation. */
6591 ipcp_propagate_stage (&topo
);
6592 /* Decide what constant propagation and cloning should be performed. */
6593 ipcp_decision_stage (&topo
);
6594 /* Store results of bits propagation. */
6595 ipcp_store_bits_results ();
6596 /* Store results of value range propagation. */
6597 ipcp_store_vr_results ();
6599 /* Free all IPCP structures. */
6600 delete clone_num_suffixes
;
6601 free_toporder_info (&topo
);
6602 delete edge_clone_summaries
;
6603 edge_clone_summaries
= NULL
;
6604 ipa_free_all_structures_after_ipa_cp ();
6606 fprintf (dump_file
, "\nIPA constant propagation end\n");
6610 /* Initialization and computation of IPCP data structures. This is the initial
6611 intraprocedural analysis of functions, which gathers information to be
6612 propagated later on. */
6615 ipcp_generate_summary (void)
6617 struct cgraph_node
*node
;
6620 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6621 ipa_register_cgraph_hooks ();
6623 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6624 ipa_analyze_node (node
);
6629 const pass_data pass_data_ipa_cp
=
6631 IPA_PASS
, /* type */
6633 OPTGROUP_NONE
, /* optinfo_flags */
6634 TV_IPA_CONSTANT_PROP
, /* tv_id */
6635 0, /* properties_required */
6636 0, /* properties_provided */
6637 0, /* properties_destroyed */
6638 0, /* todo_flags_start */
6639 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6642 class pass_ipa_cp
: public ipa_opt_pass_d
6645 pass_ipa_cp (gcc::context
*ctxt
)
6646 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6647 ipcp_generate_summary
, /* generate_summary */
6648 NULL
, /* write_summary */
6649 NULL
, /* read_summary */
6650 ipcp_write_transformation_summaries
, /*
6651 write_optimization_summary */
6652 ipcp_read_transformation_summaries
, /*
6653 read_optimization_summary */
6654 NULL
, /* stmt_fixup */
6655 0, /* function_transform_todo_flags_start */
6656 ipcp_transform_function
, /* function_transform */
6657 NULL
) /* variable_transform */
6660 /* opt_pass methods: */
6661 virtual bool gate (function
*)
6663 /* FIXME: We should remove the optimize check after we ensure we never run
6664 IPA passes when not optimizing. */
6665 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6668 virtual unsigned int execute (function
*) { return ipcp_driver (); }
6670 }; // class pass_ipa_cp
6675 make_pass_ipa_cp (gcc::context
*ctxt
)
6677 return new pass_ipa_cp (ctxt
);
6680 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6681 within the same process. For use by toplev::finalize. */
6684 ipa_cp_cc_finalize (void)
6686 base_count
= profile_count::uninitialized ();
6688 orig_overall_size
= 0;
6689 ipcp_free_transformation_sum ();