1 /* Interprocedural constant propagation
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 Contributed by Razya Ladelsky <RAZYA@il.ibm.com> and Martin Jambor
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 /* Interprocedural constant propagation (IPA-CP).
25 The goal of this transformation is to
27 1) discover functions which are always invoked with some arguments with the
28 same known constant values and modify the functions so that the
29 subsequent optimizations can take advantage of the knowledge, and
31 2) partial specialization - create specialized versions of functions
32 transformed in this way if some parameters are known constants only in
33 certain contexts but the estimated tradeoff between speedup and cost size
36 The algorithm also propagates types and attempts to perform type based
37 devirtualization. Types are propagated much like constants.
39 The algorithm basically consists of three stages. In the first, functions
40 are analyzed one at a time and jump functions are constructed for all known
41 call-sites. In the second phase, the pass propagates information from the
42 jump functions across the call to reveal what values are available at what
43 call sites, performs estimations of effects of known values on functions and
44 their callees, and finally decides what specialized extra versions should be
45 created. In the third, the special versions materialize and appropriate
48 The algorithm used is to a certain extent based on "Interprocedural Constant
49 Propagation", by David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon,
50 Comp86, pg 152-161 and "A Methodology for Procedure Cloning" by Keith D
51 Cooper, Mary W. Hall, and Ken Kennedy.
54 First stage - intraprocedural analysis
55 =======================================
57 This phase computes jump_function and modification flags.
59 A jump function for a call-site represents the values passed as an actual
60 arguments of a given call-site. In principle, there are three types of
63 Pass through - the caller's formal parameter is passed as an actual
64 argument, plus an operation on it can be performed.
65 Constant - a constant is passed as an actual argument.
66 Unknown - neither of the above.
68 All jump function types are described in detail in ipa-prop.h, together with
69 the data structures that represent them and methods of accessing them.
71 ipcp_generate_summary() is the main function of the first stage.
73 Second stage - interprocedural analysis
74 ========================================
76 This stage is itself divided into two phases. In the first, we propagate
77 known values over the call graph, in the second, we make cloning decisions.
78 It uses a different algorithm than the original Callahan's paper.
80 First, we traverse the functions topologically from callers to callees and,
81 for each strongly connected component (SCC), we propagate constants
82 according to previously computed jump functions. We also record what known
83 values depend on other known values and estimate local effects. Finally, we
84 propagate cumulative information about these effects from dependent values
85 to those on which they depend.
87 Second, we again traverse the call graph in the same topological order and
88 make clones for functions which we know are called with the same values in
89 all contexts and decide about extra specialized clones of functions just for
90 some contexts - these decisions are based on both local estimates and
91 cumulative estimates propagated from callees.
93 ipcp_propagate_stage() and ipcp_decision_stage() together constitute the
96 Third phase - materialization of clones, call statement updates.
97 ============================================
99 This stage is currently performed by call graph code (mainly in cgraphunit.cc
100 and tree-inline.cc) according to instructions inserted to the call graph by
103 #define INCLUDE_ALGORITHM
106 #include "coretypes.h"
109 #include "gimple-expr.h"
113 #include "alloc-pool.h"
114 #include "tree-pass.h"
116 #include "diagnostic.h"
117 #include "fold-const.h"
118 #include "gimple-iterator.h"
119 #include "gimple-fold.h"
120 #include "symbol-summary.h"
121 #include "tree-vrp.h"
123 #include "ipa-prop.h"
124 #include "tree-pretty-print.h"
125 #include "tree-inline.h"
126 #include "ipa-fnsummary.h"
127 #include "ipa-utils.h"
128 #include "tree-ssa-ccp.h"
129 #include "stringpool.h"
132 #include "symtab-clones.h"
133 #include "gimple-range.h"
136 /* Allocation pools for values and their sources in ipa-cp. */
138 object_allocator
<ipcp_value
<tree
> > ipcp_cst_values_pool
139 ("IPA-CP constant values");
141 object_allocator
<ipcp_value
<ipa_polymorphic_call_context
> >
142 ipcp_poly_ctx_values_pool ("IPA-CP polymorphic contexts");
144 object_allocator
<ipcp_value_source
<tree
> > ipcp_sources_pool
145 ("IPA-CP value sources");
147 object_allocator
<ipcp_agg_lattice
> ipcp_agg_lattice_pool
148 ("IPA_CP aggregate lattices");
150 /* Base count to use in heuristics when using profile feedback. */
152 static profile_count base_count
;
154 /* Original overall size of the program. */
156 static long overall_size
, orig_overall_size
;
158 /* Node name to unique clone suffix number map. */
159 static hash_map
<const char *, unsigned> *clone_num_suffixes
;
161 /* Return the param lattices structure corresponding to the Ith formal
162 parameter of the function described by INFO. */
163 static inline class ipcp_param_lattices
*
164 ipa_get_parm_lattices (class ipa_node_params
*info
, int i
)
166 gcc_assert (i
>= 0 && i
< ipa_get_param_count (info
));
167 gcc_checking_assert (!info
->ipcp_orig_node
);
168 return &(info
->lattices
[i
]);
171 /* Return the lattice corresponding to the scalar value of the Ith formal
172 parameter of the function described by INFO. */
173 static inline ipcp_lattice
<tree
> *
174 ipa_get_scalar_lat (class ipa_node_params
*info
, int i
)
176 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
177 return &plats
->itself
;
180 /* Return the lattice corresponding to the scalar value of the Ith formal
181 parameter of the function described by INFO. */
182 static inline ipcp_lattice
<ipa_polymorphic_call_context
> *
183 ipa_get_poly_ctx_lat (class ipa_node_params
*info
, int i
)
185 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
186 return &plats
->ctxlat
;
189 /* Return whether LAT is a lattice with a single constant and without an
192 template <typename valtype
>
194 ipcp_lattice
<valtype
>::is_single_const ()
196 if (bottom
|| contains_variable
|| values_count
!= 1)
202 /* Return true iff X and Y should be considered equal values by IPA-CP. */
205 values_equal_for_ipcp_p (tree x
, tree y
)
207 gcc_checking_assert (x
!= NULL_TREE
&& y
!= NULL_TREE
);
212 if (TREE_CODE (x
) == ADDR_EXPR
213 && TREE_CODE (y
) == ADDR_EXPR
214 && (TREE_CODE (TREE_OPERAND (x
, 0)) == CONST_DECL
215 || (TREE_CODE (TREE_OPERAND (x
, 0)) == VAR_DECL
216 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (x
, 0))))
217 && (TREE_CODE (TREE_OPERAND (y
, 0)) == CONST_DECL
218 || (TREE_CODE (TREE_OPERAND (y
, 0)) == VAR_DECL
219 && DECL_IN_CONSTANT_POOL (TREE_OPERAND (y
, 0)))))
220 return TREE_OPERAND (x
, 0) == TREE_OPERAND (y
, 0)
221 || operand_equal_p (DECL_INITIAL (TREE_OPERAND (x
, 0)),
222 DECL_INITIAL (TREE_OPERAND (y
, 0)), 0);
224 return operand_equal_p (x
, y
, 0);
227 /* Print V which is extracted from a value in a lattice to F. */
230 print_ipcp_constant_value (FILE * f
, ipa_polymorphic_call_context v
)
235 /* Print a lattice LAT to F. */
237 template <typename valtype
>
239 ipcp_lattice
<valtype
>::print (FILE * f
, bool dump_sources
, bool dump_benefits
)
241 ipcp_value
<valtype
> *val
;
246 fprintf (f
, "BOTTOM\n");
250 if (!values_count
&& !contains_variable
)
252 fprintf (f
, "TOP\n");
256 if (contains_variable
)
258 fprintf (f
, "VARIABLE");
264 for (val
= values
; val
; val
= val
->next
)
266 if (dump_benefits
&& prev
)
268 else if (!dump_benefits
&& prev
)
273 print_ipcp_constant_value (f
, val
->value
);
277 ipcp_value_source
<valtype
> *s
;
279 if (val
->self_recursion_generated_p ())
280 fprintf (f
, " [self_gen(%i), from:",
281 val
->self_recursion_generated_level
);
283 fprintf (f
, " [scc: %i, from:", val
->scc_no
);
284 for (s
= val
->sources
; s
; s
= s
->next
)
285 fprintf (f
, " %i(%f)", s
->cs
->caller
->order
,
286 s
->cs
->sreal_frequency ().to_double ());
291 fprintf (f
, " [loc_time: %g, loc_size: %i, "
292 "prop_time: %g, prop_size: %i]\n",
293 val
->local_time_benefit
.to_double (), val
->local_size_cost
,
294 val
->prop_time_benefit
.to_double (), val
->prop_size_cost
);
301 ipcp_bits_lattice::print (FILE *f
)
304 fprintf (f
, " Bits unknown (TOP)\n");
305 else if (bottom_p ())
306 fprintf (f
, " Bits unusable (BOTTOM)\n");
309 fprintf (f
, " Bits: value = "); print_hex (get_value (), f
);
310 fprintf (f
, ", mask = "); print_hex (get_mask (), f
);
315 /* Print value range lattice to F. */
318 ipcp_vr_lattice::print (FILE * f
)
323 /* Print all ipcp_lattices of all functions to F. */
326 print_all_lattices (FILE * f
, bool dump_sources
, bool dump_benefits
)
328 struct cgraph_node
*node
;
331 fprintf (f
, "\nLattices:\n");
332 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
334 class ipa_node_params
*info
;
336 info
= ipa_node_params_sum
->get (node
);
337 /* Skip unoptimized functions and constprop clones since we don't make
338 lattices for them. */
339 if (!info
|| info
->ipcp_orig_node
)
341 fprintf (f
, " Node: %s:\n", node
->dump_name ());
342 count
= ipa_get_param_count (info
);
343 for (i
= 0; i
< count
; i
++)
345 struct ipcp_agg_lattice
*aglat
;
346 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
347 fprintf (f
, " param [%d]: ", i
);
348 plats
->itself
.print (f
, dump_sources
, dump_benefits
);
349 fprintf (f
, " ctxs: ");
350 plats
->ctxlat
.print (f
, dump_sources
, dump_benefits
);
351 plats
->bits_lattice
.print (f
);
353 plats
->m_value_range
.print (f
);
355 if (plats
->virt_call
)
356 fprintf (f
, " virt_call flag set\n");
358 if (plats
->aggs_bottom
)
360 fprintf (f
, " AGGS BOTTOM\n");
363 if (plats
->aggs_contain_variable
)
364 fprintf (f
, " AGGS VARIABLE\n");
365 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
367 fprintf (f
, " %soffset " HOST_WIDE_INT_PRINT_DEC
": ",
368 plats
->aggs_by_ref
? "ref " : "", aglat
->offset
);
369 aglat
->print (f
, dump_sources
, dump_benefits
);
375 /* Determine whether it is at all technically possible to create clones of NODE
376 and store this information in the ipa_node_params structure associated
380 determine_versionability (struct cgraph_node
*node
,
381 class ipa_node_params
*info
)
383 const char *reason
= NULL
;
385 /* There are a number of generic reasons functions cannot be versioned. We
386 also cannot remove parameters if there are type attributes such as fnspec
388 if (node
->alias
|| node
->thunk
)
389 reason
= "alias or thunk";
390 else if (!node
->versionable
)
391 reason
= "not a tree_versionable_function";
392 else if (node
->get_availability () <= AVAIL_INTERPOSABLE
)
393 reason
= "insufficient body availability";
394 else if (!opt_for_fn (node
->decl
, optimize
)
395 || !opt_for_fn (node
->decl
, flag_ipa_cp
))
396 reason
= "non-optimized function";
397 else if (lookup_attribute ("omp declare simd", DECL_ATTRIBUTES (node
->decl
)))
399 /* Ideally we should clone the SIMD clones themselves and create
400 vector copies of them, so IPA-cp and SIMD clones can happily
401 coexist, but that may not be worth the effort. */
402 reason
= "function has SIMD clones";
404 else if (lookup_attribute ("target_clones", DECL_ATTRIBUTES (node
->decl
)))
406 /* Ideally we should clone the target clones themselves and create
407 copies of them, so IPA-cp and target clones can happily
408 coexist, but that may not be worth the effort. */
409 reason
= "function target_clones attribute";
411 /* Don't clone decls local to a comdat group; it breaks and for C++
412 decloned constructors, inlining is always better anyway. */
413 else if (node
->comdat_local_p ())
414 reason
= "comdat-local function";
415 else if (node
->calls_comdat_local
)
417 /* TODO: call is versionable if we make sure that all
418 callers are inside of a comdat group. */
419 reason
= "calls comdat-local function";
422 /* Functions calling BUILT_IN_VA_ARG_PACK and BUILT_IN_VA_ARG_PACK_LEN
423 work only when inlined. Cloning them may still lead to better code
424 because ipa-cp will not give up on cloning further. If the function is
425 external this however leads to wrong code because we may end up producing
426 offline copy of the function. */
427 if (DECL_EXTERNAL (node
->decl
))
428 for (cgraph_edge
*edge
= node
->callees
; !reason
&& edge
;
429 edge
= edge
->next_callee
)
430 if (fndecl_built_in_p (edge
->callee
->decl
, BUILT_IN_NORMAL
))
432 if (DECL_FUNCTION_CODE (edge
->callee
->decl
) == BUILT_IN_VA_ARG_PACK
)
433 reason
= "external function which calls va_arg_pack";
434 if (DECL_FUNCTION_CODE (edge
->callee
->decl
)
435 == BUILT_IN_VA_ARG_PACK_LEN
)
436 reason
= "external function which calls va_arg_pack_len";
439 if (reason
&& dump_file
&& !node
->alias
&& !node
->thunk
)
440 fprintf (dump_file
, "Function %s is not versionable, reason: %s.\n",
441 node
->dump_name (), reason
);
443 info
->versionable
= (reason
== NULL
);
446 /* Return true if it is at all technically possible to create clones of a
450 ipcp_versionable_function_p (struct cgraph_node
*node
)
452 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
453 return info
&& info
->versionable
;
456 /* Structure holding accumulated information about callers of a node. */
458 struct caller_statistics
460 /* If requested (see below), self-recursive call counts are summed into this
462 profile_count rec_count_sum
;
463 /* The sum of all ipa counts of all the other (non-recursive) calls. */
464 profile_count count_sum
;
465 /* Sum of all frequencies for all calls. */
467 /* Number of calls and hot calls respectively. */
468 int n_calls
, n_hot_calls
;
469 /* If itself is set up, also count the number of non-self-recursive
472 /* If non-NULL, this is the node itself and calls from it should have their
473 counts included in rec_count_sum and not count_sum. */
477 /* Initialize fields of STAT to zeroes and optionally set it up so that edges
478 from IGNORED_CALLER are not counted. */
481 init_caller_stats (caller_statistics
*stats
, cgraph_node
*itself
= NULL
)
483 stats
->rec_count_sum
= profile_count::zero ();
484 stats
->count_sum
= profile_count::zero ();
486 stats
->n_hot_calls
= 0;
487 stats
->n_nonrec_calls
= 0;
489 stats
->itself
= itself
;
492 /* Worker callback of cgraph_for_node_and_aliases accumulating statistics of
493 non-thunk incoming edges to NODE. */
496 gather_caller_stats (struct cgraph_node
*node
, void *data
)
498 struct caller_statistics
*stats
= (struct caller_statistics
*) data
;
499 struct cgraph_edge
*cs
;
501 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
502 if (!cs
->caller
->thunk
)
504 ipa_node_params
*info
= ipa_node_params_sum
->get (cs
->caller
);
505 if (info
&& info
->node_dead
)
508 if (cs
->count
.ipa ().initialized_p ())
510 if (stats
->itself
&& stats
->itself
== cs
->caller
)
511 stats
->rec_count_sum
+= cs
->count
.ipa ();
513 stats
->count_sum
+= cs
->count
.ipa ();
515 stats
->freq_sum
+= cs
->sreal_frequency ();
517 if (stats
->itself
&& stats
->itself
!= cs
->caller
)
518 stats
->n_nonrec_calls
++;
520 if (cs
->maybe_hot_p ())
521 stats
->n_hot_calls
++;
527 /* Return true if this NODE is viable candidate for cloning. */
530 ipcp_cloning_candidate_p (struct cgraph_node
*node
)
532 struct caller_statistics stats
;
534 gcc_checking_assert (node
->has_gimple_body_p ());
536 if (!opt_for_fn (node
->decl
, flag_ipa_cp_clone
))
539 fprintf (dump_file
, "Not considering %s for cloning; "
540 "-fipa-cp-clone disabled.\n",
545 if (node
->optimize_for_size_p ())
548 fprintf (dump_file
, "Not considering %s for cloning; "
549 "optimizing it for size.\n",
554 init_caller_stats (&stats
);
555 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
, false);
557 if (ipa_size_summaries
->get (node
)->self_size
< stats
.n_calls
)
560 fprintf (dump_file
, "Considering %s for cloning; code might shrink.\n",
565 /* When profile is available and function is hot, propagate into it even if
566 calls seems cold; constant propagation can improve function's speed
568 if (stats
.count_sum
> profile_count::zero ()
569 && node
->count
.ipa ().initialized_p ())
571 if (stats
.count_sum
> node
->count
.ipa ().apply_scale (90, 100))
574 fprintf (dump_file
, "Considering %s for cloning; "
575 "usually called directly.\n",
580 if (!stats
.n_hot_calls
)
583 fprintf (dump_file
, "Not considering %s for cloning; no hot calls.\n",
588 fprintf (dump_file
, "Considering %s for cloning.\n",
593 template <typename valtype
>
594 class value_topo_info
597 /* Head of the linked list of topologically sorted values. */
598 ipcp_value
<valtype
> *values_topo
;
599 /* Stack for creating SCCs, represented by a linked list too. */
600 ipcp_value
<valtype
> *stack
;
601 /* Counter driving the algorithm in add_val_to_toposort. */
604 value_topo_info () : values_topo (NULL
), stack (NULL
), dfs_counter (0)
606 void add_val (ipcp_value
<valtype
> *cur_val
);
607 void propagate_effects ();
610 /* Arrays representing a topological ordering of call graph nodes and a stack
611 of nodes used during constant propagation and also data required to perform
612 topological sort of values and propagation of benefits in the determined
618 /* Array with obtained topological order of cgraph nodes. */
619 struct cgraph_node
**order
;
620 /* Stack of cgraph nodes used during propagation within SCC until all values
621 in the SCC stabilize. */
622 struct cgraph_node
**stack
;
623 int nnodes
, stack_top
;
625 value_topo_info
<tree
> constants
;
626 value_topo_info
<ipa_polymorphic_call_context
> contexts
;
628 ipa_topo_info () : order(NULL
), stack(NULL
), nnodes(0), stack_top(0),
633 /* Skip edges from and to nodes without ipa_cp enabled.
634 Ignore not available symbols. */
637 ignore_edge_p (cgraph_edge
*e
)
639 enum availability avail
;
640 cgraph_node
*ultimate_target
641 = e
->callee
->function_or_virtual_thunk_symbol (&avail
, e
->caller
);
643 return (avail
<= AVAIL_INTERPOSABLE
644 || !opt_for_fn (ultimate_target
->decl
, optimize
)
645 || !opt_for_fn (ultimate_target
->decl
, flag_ipa_cp
));
648 /* Allocate the arrays in TOPO and topologically sort the nodes into order. */
651 build_toporder_info (class ipa_topo_info
*topo
)
653 topo
->order
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
654 topo
->stack
= XCNEWVEC (struct cgraph_node
*, symtab
->cgraph_count
);
656 gcc_checking_assert (topo
->stack_top
== 0);
657 topo
->nnodes
= ipa_reduced_postorder (topo
->order
, true,
661 /* Free information about strongly connected components and the arrays in
665 free_toporder_info (class ipa_topo_info
*topo
)
667 ipa_free_postorder_info ();
672 /* Add NODE to the stack in TOPO, unless it is already there. */
675 push_node_to_stack (class ipa_topo_info
*topo
, struct cgraph_node
*node
)
677 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
678 if (info
->node_enqueued
)
680 info
->node_enqueued
= 1;
681 topo
->stack
[topo
->stack_top
++] = node
;
684 /* Pop a node from the stack in TOPO and return it or return NULL if the stack
687 static struct cgraph_node
*
688 pop_node_from_stack (class ipa_topo_info
*topo
)
692 struct cgraph_node
*node
;
694 node
= topo
->stack
[topo
->stack_top
];
695 ipa_node_params_sum
->get (node
)->node_enqueued
= 0;
702 /* Set lattice LAT to bottom and return true if it previously was not set as
705 template <typename valtype
>
707 ipcp_lattice
<valtype
>::set_to_bottom ()
714 /* Mark lattice as containing an unknown value and return true if it previously
715 was not marked as such. */
717 template <typename valtype
>
719 ipcp_lattice
<valtype
>::set_contains_variable ()
721 bool ret
= !contains_variable
;
722 contains_variable
= true;
726 /* Set all aggregate lattices in PLATS to bottom and return true if they were
727 not previously set as such. */
730 set_agg_lats_to_bottom (class ipcp_param_lattices
*plats
)
732 bool ret
= !plats
->aggs_bottom
;
733 plats
->aggs_bottom
= true;
737 /* Mark all aggregate lattices in PLATS as containing an unknown value and
738 return true if they were not previously marked as such. */
741 set_agg_lats_contain_variable (class ipcp_param_lattices
*plats
)
743 bool ret
= !plats
->aggs_contain_variable
;
744 plats
->aggs_contain_variable
= true;
749 ipcp_vr_lattice::meet_with (const ipcp_vr_lattice
&other
)
751 return meet_with_1 (other
.m_vr
);
754 /* Meet the current value of the lattice with the range described by
758 ipcp_vr_lattice::meet_with (const vrange
&p_vr
)
760 return meet_with_1 (p_vr
);
763 /* Meet the current value of the lattice with the range described by
764 OTHER_VR. Return TRUE if anything changed. */
767 ipcp_vr_lattice::meet_with_1 (const vrange
&other_vr
)
772 if (other_vr
.varying_p ())
773 return set_to_bottom ();
778 value_range
save (m_vr
);
779 res
= m_vr
.union_ (other_vr
);
780 gcc_assert (res
== (m_vr
!= save
));
783 res
= m_vr
.union_ (other_vr
);
787 /* Return true if value range information in the lattice is yet unknown. */
790 ipcp_vr_lattice::top_p () const
792 return m_vr
.undefined_p ();
795 /* Return true if value range information in the lattice is known to be
799 ipcp_vr_lattice::bottom_p () const
801 return m_vr
.varying_p ();
804 /* Set value range information in the lattice to bottom. Return true if it
805 previously was in a different state. */
808 ipcp_vr_lattice::set_to_bottom ()
810 if (m_vr
.varying_p ())
813 /* Setting an unsupported type here forces the temporary to default
814 to unsupported_range, which can handle VARYING/DEFINED ranges,
815 but nothing else (union, intersect, etc). This allows us to set
816 bottoms on any ranges, and is safe as all users of the lattice
817 check for bottom first. */
818 m_vr
.set_type (void_type_node
);
819 m_vr
.set_varying (void_type_node
);
824 /* Set lattice value to bottom, if it already isn't the case. */
827 ipcp_bits_lattice::set_to_bottom ()
831 m_lattice_val
= IPA_BITS_VARYING
;
837 /* Set to constant if it isn't already. Only meant to be called
838 when switching state from TOP. */
841 ipcp_bits_lattice::set_to_constant (widest_int value
, widest_int mask
)
843 gcc_assert (top_p ());
844 m_lattice_val
= IPA_BITS_CONSTANT
;
845 m_value
= wi::bit_and (wi::bit_not (mask
), value
);
850 /* Return true if any of the known bits are non-zero. */
853 ipcp_bits_lattice::known_nonzero_p () const
857 return wi::ne_p (wi::bit_and (wi::bit_not (m_mask
), m_value
), 0);
860 /* Convert operand to value, mask form. */
863 ipcp_bits_lattice::get_value_and_mask (tree operand
, widest_int
*valuep
, widest_int
*maskp
)
865 wide_int
get_nonzero_bits (const_tree
);
867 if (TREE_CODE (operand
) == INTEGER_CST
)
869 *valuep
= wi::to_widest (operand
);
879 /* Meet operation, similar to ccp_lattice_meet, we xor values
880 if this->value, value have different values at same bit positions, we want
881 to drop that bit to varying. Return true if mask is changed.
882 This function assumes that the lattice value is in CONSTANT state. If
883 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
886 ipcp_bits_lattice::meet_with_1 (widest_int value
, widest_int mask
,
887 unsigned precision
, bool drop_all_ones
)
889 gcc_assert (constant_p ());
891 widest_int old_mask
= m_mask
;
892 m_mask
= (m_mask
| mask
) | (m_value
^ value
);
897 if (wi::sext (m_mask
, precision
) == -1)
898 return set_to_bottom ();
900 return m_mask
!= old_mask
;
903 /* Meet the bits lattice with operand
904 described by <value, mask, sgn, precision. */
907 ipcp_bits_lattice::meet_with (widest_int value
, widest_int mask
,
915 if (wi::sext (mask
, precision
) == -1)
916 return set_to_bottom ();
917 return set_to_constant (value
, mask
);
920 return meet_with_1 (value
, mask
, precision
, false);
923 /* Meet bits lattice with the result of bit_value_binop (other, operand)
924 if code is binary operation or bit_value_unop (other) if code is unary op.
925 In the case when code is nop_expr, no adjustment is required. If
926 DROP_ALL_ONES, mask out any known bits with value one afterwards. */
929 ipcp_bits_lattice::meet_with (ipcp_bits_lattice
& other
, unsigned precision
,
930 signop sgn
, enum tree_code code
, tree operand
,
933 if (other
.bottom_p ())
934 return set_to_bottom ();
936 if (bottom_p () || other
.top_p ())
939 widest_int adjusted_value
, adjusted_mask
;
941 if (TREE_CODE_CLASS (code
) == tcc_binary
)
943 tree type
= TREE_TYPE (operand
);
944 widest_int o_value
, o_mask
;
945 get_value_and_mask (operand
, &o_value
, &o_mask
);
947 bit_value_binop (code
, sgn
, precision
, &adjusted_value
, &adjusted_mask
,
948 sgn
, precision
, other
.get_value (), other
.get_mask (),
949 TYPE_SIGN (type
), TYPE_PRECISION (type
), o_value
, o_mask
);
951 if (wi::sext (adjusted_mask
, precision
) == -1)
952 return set_to_bottom ();
955 else if (TREE_CODE_CLASS (code
) == tcc_unary
)
957 bit_value_unop (code
, sgn
, precision
, &adjusted_value
,
958 &adjusted_mask
, sgn
, precision
, other
.get_value (),
961 if (wi::sext (adjusted_mask
, precision
) == -1)
962 return set_to_bottom ();
966 return set_to_bottom ();
972 adjusted_mask
|= adjusted_value
;
973 adjusted_value
&= ~adjusted_mask
;
975 if (wi::sext (adjusted_mask
, precision
) == -1)
976 return set_to_bottom ();
977 return set_to_constant (adjusted_value
, adjusted_mask
);
980 return meet_with_1 (adjusted_value
, adjusted_mask
, precision
,
984 /* Dump the contents of the list to FILE. */
987 ipa_argagg_value_list::dump (FILE *f
)
990 for (const ipa_argagg_value
&av
: m_elts
)
992 fprintf (f
, "%s %i[%u]=", comma
? "," : "",
993 av
.index
, av
.unit_offset
);
994 print_generic_expr (f
, av
.value
);
996 fprintf (f
, "(by_ref)");
998 fprintf (f
, "(killed)");
1004 /* Dump the contents of the list to stderr. */
1007 ipa_argagg_value_list::debug ()
1012 /* Return the item describing a constant stored for INDEX at UNIT_OFFSET or
1013 NULL if there is no such constant. */
1015 const ipa_argagg_value
*
1016 ipa_argagg_value_list::get_elt (int index
, unsigned unit_offset
) const
1018 ipa_argagg_value key
;
1020 key
.unit_offset
= unit_offset
;
1021 const ipa_argagg_value
*res
1022 = std::lower_bound (m_elts
.begin (), m_elts
.end (), key
,
1023 [] (const ipa_argagg_value
&elt
,
1024 const ipa_argagg_value
&val
)
1026 if (elt
.index
< val
.index
)
1028 if (elt
.index
> val
.index
)
1030 if (elt
.unit_offset
< val
.unit_offset
)
1035 if (res
== m_elts
.end ()
1036 || res
->index
!= index
1037 || res
->unit_offset
!= unit_offset
)
1040 /* TODO: perhaps remove the check (that the underlying array is indeed
1041 sorted) if it turns out it can be too slow? */
1045 const ipa_argagg_value
*slow_res
= NULL
;
1046 int prev_index
= -1;
1047 unsigned prev_unit_offset
= 0;
1048 for (const ipa_argagg_value
&av
: m_elts
)
1050 gcc_assert (prev_index
< 0
1051 || prev_index
< av
.index
1052 || prev_unit_offset
< av
.unit_offset
);
1053 prev_index
= av
.index
;
1054 prev_unit_offset
= av
.unit_offset
;
1055 if (av
.index
== index
1056 && av
.unit_offset
== unit_offset
)
1059 gcc_assert (res
== slow_res
);
1064 /* Return the first item describing a constant stored for parameter with INDEX,
1065 regardless of offset or reference, or NULL if there is no such constant. */
1067 const ipa_argagg_value
*
1068 ipa_argagg_value_list::get_elt_for_index (int index
) const
1070 const ipa_argagg_value
*res
1071 = std::lower_bound (m_elts
.begin (), m_elts
.end (), index
,
1072 [] (const ipa_argagg_value
&elt
, unsigned idx
)
1074 return elt
.index
< idx
;
1076 if (res
== m_elts
.end ()
1077 || res
->index
!= index
)
1082 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, not
1083 performing any check of whether value is passed by reference, or NULL_TREE
1084 if there is no such constant. */
1087 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
) const
1089 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1090 return av
? av
->value
: NULL_TREE
;
1093 /* Return the aggregate constant stored for INDEX at UNIT_OFFSET, if it is
1094 passed by reference or not according to BY_REF, or NULL_TREE if there is
1095 no such constant. */
1098 ipa_argagg_value_list::get_value (int index
, unsigned unit_offset
,
1101 const ipa_argagg_value
*av
= get_elt (index
, unit_offset
);
1102 if (av
&& av
->by_ref
== by_ref
)
1107 /* Return true if all elements present in OTHER are also present in this
1111 ipa_argagg_value_list::superset_of_p (const ipa_argagg_value_list
&other
) const
1114 for (unsigned i
= 0; i
< other
.m_elts
.size (); i
++)
1116 unsigned other_index
= other
.m_elts
[i
].index
;
1117 unsigned other_offset
= other
.m_elts
[i
].unit_offset
;
1119 while (j
< m_elts
.size ()
1120 && (m_elts
[j
].index
< other_index
1121 || (m_elts
[j
].index
== other_index
1122 && m_elts
[j
].unit_offset
< other_offset
)))
1125 if (j
>= m_elts
.size ()
1126 || m_elts
[j
].index
!= other_index
1127 || m_elts
[j
].unit_offset
!= other_offset
1128 || m_elts
[j
].by_ref
!= other
.m_elts
[i
].by_ref
1130 || !values_equal_for_ipcp_p (m_elts
[j
].value
, other
.m_elts
[i
].value
))
1136 /* Push all items in this list that describe parameter SRC_INDEX into RES as
1137 ones describing DST_INDEX while subtracting UNIT_DELTA from their unit
1138 offsets but skip those which would end up with a negative offset. */
1141 ipa_argagg_value_list::push_adjusted_values (unsigned src_index
,
1142 unsigned dest_index
,
1143 unsigned unit_delta
,
1144 vec
<ipa_argagg_value
> *res
) const
1146 const ipa_argagg_value
*av
= get_elt_for_index (src_index
);
1149 unsigned prev_unit_offset
= 0;
1151 for (; av
< m_elts
.end (); ++av
)
1153 if (av
->index
> src_index
)
1155 if (av
->index
== src_index
1156 && (av
->unit_offset
>= unit_delta
)
1159 ipa_argagg_value new_av
;
1160 gcc_checking_assert (av
->value
);
1161 new_av
.value
= av
->value
;
1162 new_av
.unit_offset
= av
->unit_offset
- unit_delta
;
1163 new_av
.index
= dest_index
;
1164 new_av
.by_ref
= av
->by_ref
;
1165 gcc_assert (!av
->killed
);
1166 new_av
.killed
= false;
1168 /* Quick check that the offsets we push are indeed increasing. */
1170 || new_av
.unit_offset
> prev_unit_offset
);
1171 prev_unit_offset
= new_av
.unit_offset
;
1174 res
->safe_push (new_av
);
1179 /* Push to RES information about single lattices describing aggregate values in
1180 PLATS as those describing parameter DEST_INDEX and the original offset minus
1181 UNIT_DELTA. Return true if any item has been pushed to RES. */
1184 push_agg_values_from_plats (ipcp_param_lattices
*plats
, int dest_index
,
1185 unsigned unit_delta
,
1186 vec
<ipa_argagg_value
> *res
)
1188 if (plats
->aggs_contain_variable
)
1191 bool pushed_sth
= false;
1193 unsigned prev_unit_offset
= 0;
1194 for (struct ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
1195 if (aglat
->is_single_const ()
1196 && (aglat
->offset
/ BITS_PER_UNIT
- unit_delta
) >= 0)
1198 ipa_argagg_value iav
;
1199 iav
.value
= aglat
->values
->value
;
1200 iav
.unit_offset
= aglat
->offset
/ BITS_PER_UNIT
- unit_delta
;
1201 iav
.index
= dest_index
;
1202 iav
.by_ref
= plats
->aggs_by_ref
;
1206 || iav
.unit_offset
> prev_unit_offset
);
1207 prev_unit_offset
= iav
.unit_offset
;
1211 res
->safe_push (iav
);
1216 /* Turn all values in LIST that are not present in OTHER into NULL_TREEs.
1217 Return the number of remaining valid entries. */
1220 intersect_argaggs_with (vec
<ipa_argagg_value
> &elts
,
1221 const vec
<ipa_argagg_value
> &other
)
1223 unsigned valid_entries
= 0;
1225 for (unsigned i
= 0; i
< elts
.length (); i
++)
1230 unsigned this_index
= elts
[i
].index
;
1231 unsigned this_offset
= elts
[i
].unit_offset
;
1233 while (j
< other
.length ()
1234 && (other
[j
].index
< this_index
1235 || (other
[j
].index
== this_index
1236 && other
[j
].unit_offset
< this_offset
)))
1239 if (j
>= other
.length ())
1241 elts
[i
].value
= NULL_TREE
;
1245 if (other
[j
].index
== this_index
1246 && other
[j
].unit_offset
== this_offset
1247 && other
[j
].by_ref
== elts
[i
].by_ref
1249 && values_equal_for_ipcp_p (other
[j
].value
, elts
[i
].value
))
1252 elts
[i
].value
= NULL_TREE
;
1254 return valid_entries
;
1257 /* Mark bot aggregate and scalar lattices as containing an unknown variable,
1258 return true is any of them has not been marked as such so far. */
1261 set_all_contains_variable (class ipcp_param_lattices
*plats
)
1264 ret
= plats
->itself
.set_contains_variable ();
1265 ret
|= plats
->ctxlat
.set_contains_variable ();
1266 ret
|= set_agg_lats_contain_variable (plats
);
1267 ret
|= plats
->bits_lattice
.set_to_bottom ();
1268 ret
|= plats
->m_value_range
.set_to_bottom ();
1272 /* Worker of call_for_symbol_thunks_and_aliases, increment the integer DATA
1273 points to by the number of callers to NODE. */
1276 count_callers (cgraph_node
*node
, void *data
)
1278 int *caller_count
= (int *) data
;
1280 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
1281 /* Local thunks can be handled transparently, but if the thunk cannot
1282 be optimized out, count it as a real use. */
1283 if (!cs
->caller
->thunk
|| !cs
->caller
->local
)
1288 /* Worker of call_for_symbol_thunks_and_aliases, it is supposed to be called on
1289 the one caller of some other node. Set the caller's corresponding flag. */
1292 set_single_call_flag (cgraph_node
*node
, void *)
1294 cgraph_edge
*cs
= node
->callers
;
1295 /* Local thunks can be handled transparently, skip them. */
1296 while (cs
&& cs
->caller
->thunk
&& cs
->caller
->local
)
1297 cs
= cs
->next_caller
;
1299 if (ipa_node_params
* info
= ipa_node_params_sum
->get (cs
->caller
))
1301 info
->node_calling_single_call
= true;
1307 /* Initialize ipcp_lattices. */
1310 initialize_node_lattices (struct cgraph_node
*node
)
1312 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1313 struct cgraph_edge
*ie
;
1314 bool disable
= false, variable
= false;
1317 gcc_checking_assert (node
->has_gimple_body_p ());
1319 if (!ipa_get_param_count (info
))
1321 else if (node
->local
)
1323 int caller_count
= 0;
1324 node
->call_for_symbol_thunks_and_aliases (count_callers
, &caller_count
,
1326 gcc_checking_assert (caller_count
> 0);
1327 if (caller_count
== 1)
1328 node
->call_for_symbol_thunks_and_aliases (set_single_call_flag
,
1333 /* When cloning is allowed, we can assume that externally visible
1334 functions are not called. We will compensate this by cloning
1336 if (ipcp_versionable_function_p (node
)
1337 && ipcp_cloning_candidate_p (node
))
1343 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1344 && !node
->alias
&& !node
->thunk
)
1346 fprintf (dump_file
, "Initializing lattices of %s\n",
1347 node
->dump_name ());
1348 if (disable
|| variable
)
1349 fprintf (dump_file
, " Marking all lattices as %s\n",
1350 disable
? "BOTTOM" : "VARIABLE");
1353 auto_vec
<bool, 16> surviving_params
;
1354 bool pre_modified
= false;
1356 clone_info
*cinfo
= clone_info::get (node
);
1358 if (!disable
&& cinfo
&& cinfo
->param_adjustments
)
1360 /* At the moment all IPA optimizations should use the number of
1361 parameters of the prevailing decl as the m_always_copy_start.
1362 Handling any other value would complicate the code below, so for the
1363 time bing let's only assert it is so. */
1364 gcc_assert ((cinfo
->param_adjustments
->m_always_copy_start
1365 == ipa_get_param_count (info
))
1366 || cinfo
->param_adjustments
->m_always_copy_start
< 0);
1368 pre_modified
= true;
1369 cinfo
->param_adjustments
->get_surviving_params (&surviving_params
);
1371 if (dump_file
&& (dump_flags
& TDF_DETAILS
)
1372 && !node
->alias
&& !node
->thunk
)
1375 for (int j
= 0; j
< ipa_get_param_count (info
); j
++)
1377 if (j
< (int) surviving_params
.length ()
1378 && surviving_params
[j
])
1383 " The following parameters are dead on arrival:");
1386 fprintf (dump_file
, " %u", j
);
1389 fprintf (dump_file
, "\n");
1393 for (i
= 0; i
< ipa_get_param_count (info
); i
++)
1395 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
1396 tree type
= ipa_get_type (info
, i
);
1398 || !ipa_get_type (info
, i
)
1399 || (pre_modified
&& (surviving_params
.length () <= (unsigned) i
1400 || !surviving_params
[i
])))
1402 plats
->itself
.set_to_bottom ();
1403 plats
->ctxlat
.set_to_bottom ();
1404 set_agg_lats_to_bottom (plats
);
1405 plats
->bits_lattice
.set_to_bottom ();
1406 plats
->m_value_range
.init (type
);
1407 plats
->m_value_range
.set_to_bottom ();
1411 plats
->m_value_range
.init (type
);
1413 set_all_contains_variable (plats
);
1417 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
1418 if (ie
->indirect_info
->polymorphic
1419 && ie
->indirect_info
->param_index
>= 0)
1421 gcc_checking_assert (ie
->indirect_info
->param_index
>= 0);
1422 ipa_get_parm_lattices (info
,
1423 ie
->indirect_info
->param_index
)->virt_call
= 1;
1427 /* Return true if VALUE can be safely IPA-CP propagated to a parameter of type
1431 ipacp_value_safe_for_type (tree param_type
, tree value
)
1433 tree val_type
= TREE_TYPE (value
);
1434 if (param_type
== val_type
1435 || useless_type_conversion_p (param_type
, val_type
)
1436 || fold_convertible_p (param_type
, value
))
1442 /* Return the result of a (possibly arithmetic) operation on the constant
1443 value INPUT. OPERAND is 2nd operand for binary operation. RES_TYPE is
1444 the type of the parameter to which the result is passed. Return
1445 NULL_TREE if that cannot be determined or be considered an
1446 interprocedural invariant. */
1449 ipa_get_jf_arith_result (enum tree_code opcode
, tree input
, tree operand
,
1454 if (opcode
== NOP_EXPR
)
1456 if (!is_gimple_ip_invariant (input
))
1459 if (opcode
== ASSERT_EXPR
)
1461 if (values_equal_for_ipcp_p (input
, operand
))
1469 if (TREE_CODE_CLASS (opcode
) == tcc_comparison
)
1470 res_type
= boolean_type_node
;
1471 else if (expr_type_first_operand_type_p (opcode
))
1472 res_type
= TREE_TYPE (input
);
1477 if (TREE_CODE_CLASS (opcode
) == tcc_unary
)
1478 res
= fold_unary (opcode
, res_type
, input
);
1480 res
= fold_binary (opcode
, res_type
, input
, operand
);
1482 if (res
&& !is_gimple_ip_invariant (res
))
1488 /* Return the result of a (possibly arithmetic) pass through jump function
1489 JFUNC on the constant value INPUT. RES_TYPE is the type of the parameter
1490 to which the result is passed. Return NULL_TREE if that cannot be
1491 determined or be considered an interprocedural invariant. */
1494 ipa_get_jf_pass_through_result (struct ipa_jump_func
*jfunc
, tree input
,
1497 return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc
),
1499 ipa_get_jf_pass_through_operand (jfunc
),
1503 /* Return the result of an ancestor jump function JFUNC on the constant value
1504 INPUT. Return NULL_TREE if that cannot be determined. */
1507 ipa_get_jf_ancestor_result (struct ipa_jump_func
*jfunc
, tree input
)
1509 gcc_checking_assert (TREE_CODE (input
) != TREE_BINFO
);
1510 if (TREE_CODE (input
) == ADDR_EXPR
)
1512 gcc_checking_assert (is_gimple_ip_invariant_address (input
));
1513 poly_int64 off
= ipa_get_jf_ancestor_offset (jfunc
);
1514 if (known_eq (off
, 0))
1516 poly_int64 byte_offset
= exact_div (off
, BITS_PER_UNIT
);
1517 return build1 (ADDR_EXPR
, TREE_TYPE (input
),
1518 fold_build2 (MEM_REF
, TREE_TYPE (TREE_TYPE (input
)), input
,
1519 build_int_cst (ptr_type_node
, byte_offset
)));
1521 else if (ipa_get_jf_ancestor_keep_null (jfunc
)
1528 /* Determine whether JFUNC evaluates to a single known constant value and if
1529 so, return it. Otherwise return NULL. INFO describes the caller node or
1530 the one it is inlined to, so that pass-through jump functions can be
1531 evaluated. PARM_TYPE is the type of the parameter to which the result is
1535 ipa_value_from_jfunc (class ipa_node_params
*info
, struct ipa_jump_func
*jfunc
,
1538 if (jfunc
->type
== IPA_JF_CONST
)
1539 return ipa_get_jf_constant (jfunc
);
1540 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
1541 || jfunc
->type
== IPA_JF_ANCESTOR
)
1546 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1547 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1549 idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1551 if (info
->ipcp_orig_node
)
1552 input
= info
->known_csts
[idx
];
1555 ipcp_lattice
<tree
> *lat
;
1557 if (info
->lattices
.is_empty ()
1558 || idx
>= ipa_get_param_count (info
))
1560 lat
= ipa_get_scalar_lat (info
, idx
);
1561 if (!lat
->is_single_const ())
1563 input
= lat
->values
->value
;
1569 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1570 return ipa_get_jf_pass_through_result (jfunc
, input
, parm_type
);
1572 return ipa_get_jf_ancestor_result (jfunc
, input
);
1578 /* Determine whether JFUNC evaluates to single known polymorphic context, given
1579 that INFO describes the caller node or the one it is inlined to, CS is the
1580 call graph edge corresponding to JFUNC and CSIDX index of the described
1583 ipa_polymorphic_call_context
1584 ipa_context_from_jfunc (ipa_node_params
*info
, cgraph_edge
*cs
, int csidx
,
1585 ipa_jump_func
*jfunc
)
1587 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
1588 ipa_polymorphic_call_context ctx
;
1589 ipa_polymorphic_call_context
*edge_ctx
1590 = cs
? ipa_get_ith_polymorhic_call_context (args
, csidx
) : NULL
;
1592 if (edge_ctx
&& !edge_ctx
->useless_p ())
1595 if (jfunc
->type
== IPA_JF_PASS_THROUGH
1596 || jfunc
->type
== IPA_JF_ANCESTOR
)
1598 ipa_polymorphic_call_context srcctx
;
1600 bool type_preserved
= true;
1601 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1603 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
1605 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
1606 srcidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1610 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
1611 srcidx
= ipa_get_jf_ancestor_formal_id (jfunc
);
1613 if (info
->ipcp_orig_node
)
1615 if (info
->known_contexts
.exists ())
1616 srcctx
= info
->known_contexts
[srcidx
];
1620 if (info
->lattices
.is_empty ()
1621 || srcidx
>= ipa_get_param_count (info
))
1623 ipcp_lattice
<ipa_polymorphic_call_context
> *lat
;
1624 lat
= ipa_get_poly_ctx_lat (info
, srcidx
);
1625 if (!lat
->is_single_const ())
1627 srcctx
= lat
->values
->value
;
1629 if (srcctx
.useless_p ())
1631 if (jfunc
->type
== IPA_JF_ANCESTOR
)
1632 srcctx
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
1633 if (!type_preserved
)
1634 srcctx
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
1635 srcctx
.combine_with (ctx
);
1642 /* Emulate effects of unary OPERATION and/or conversion from SRC_TYPE to
1643 DST_TYPE on value range in SRC_VR and store it to DST_VR. Return true if
1644 the result is a range that is not VARYING nor UNDEFINED. */
1647 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1648 const vrange
&src_vr
,
1649 enum tree_code operation
,
1650 tree dst_type
, tree src_type
)
1652 if (!ipa_vr_supported_type_p (dst_type
)
1653 || !ipa_vr_supported_type_p (src_type
))
1656 range_op_handler
handler (operation
);
1660 value_range
varying (dst_type
);
1661 varying
.set_varying (dst_type
);
1663 return (handler
.operand_check_p (dst_type
, src_type
, dst_type
)
1664 && handler
.fold_range (dst_vr
, dst_type
, src_vr
, varying
)
1665 && !dst_vr
.varying_p ()
1666 && !dst_vr
.undefined_p ());
1669 /* Same as above, but the SRC_VR argument is an IPA_VR which must
1670 first be extracted onto a vrange. */
1673 ipa_vr_operation_and_type_effects (vrange
&dst_vr
,
1674 const ipa_vr
&src_vr
,
1675 enum tree_code operation
,
1676 tree dst_type
, tree src_type
)
1679 src_vr
.get_vrange (tmp
);
1680 return ipa_vr_operation_and_type_effects (dst_vr
, tmp
, operation
,
1681 dst_type
, src_type
);
1684 /* Determine range of JFUNC given that INFO describes the caller node or
1685 the one it is inlined to, CS is the call graph edge corresponding to JFUNC
1686 and PARM_TYPE of the parameter. */
1689 ipa_value_range_from_jfunc (vrange
&vr
,
1690 ipa_node_params
*info
, cgraph_edge
*cs
,
1691 ipa_jump_func
*jfunc
, tree parm_type
)
1693 vr
.set_undefined ();
1696 ipa_vr_operation_and_type_effects (vr
,
1698 NOP_EXPR
, parm_type
,
1699 jfunc
->m_vr
->type ());
1700 if (vr
.singleton_p ())
1702 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
1705 ipcp_transformation
*sum
1706 = ipcp_get_transformation_summary (cs
->caller
->inlined_to
1707 ? cs
->caller
->inlined_to
1709 if (!sum
|| !sum
->m_vr
)
1712 idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
1714 if (!(*sum
->m_vr
)[idx
].known_p ())
1716 tree vr_type
= ipa_get_type (info
, idx
);
1718 (*sum
->m_vr
)[idx
].get_vrange (srcvr
);
1720 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
1722 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
1724 value_range
res (parm_type
);
1726 if (ipa_vr_operation_and_type_effects (res
,
1728 operation
, parm_type
,
1734 value_range
op_res (vr_type
);
1735 value_range
res (vr_type
);
1736 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
1737 value_range
op_vr (TREE_TYPE (op
));
1738 range_op_handler
handler (operation
);
1740 ipa_range_set_and_normalize (op_vr
, op
);
1743 || !op_res
.supports_type_p (vr_type
)
1744 /* Sometimes we try to fold comparison operators using a
1745 pointer type to hold the result instead of a boolean
1746 type. Avoid trapping in the sanity check in
1747 fold_range until this is fixed. */
1748 || srcvr
.undefined_p ()
1749 || op_vr
.undefined_p ()
1750 || !handler
.operand_check_p (vr_type
, srcvr
.type (), op_vr
.type ())
1751 || !handler
.fold_range (op_res
, vr_type
, srcvr
, op_vr
))
1752 op_res
.set_varying (vr_type
);
1754 if (ipa_vr_operation_and_type_effects (res
,
1756 NOP_EXPR
, parm_type
,
1763 /* Determine whether ITEM, jump function for an aggregate part, evaluates to a
1764 single known constant value and if so, return it. Otherwise return NULL.
1765 NODE and INFO describes the caller node or the one it is inlined to, and
1766 its related info. */
1769 ipa_agg_value_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
1770 const ipa_agg_jf_item
*item
)
1772 tree value
= NULL_TREE
;
1775 if (item
->offset
< 0
1776 || item
->jftype
== IPA_JF_UNKNOWN
1777 || item
->offset
>= (HOST_WIDE_INT
) UINT_MAX
* BITS_PER_UNIT
)
1780 if (item
->jftype
== IPA_JF_CONST
)
1781 return item
->value
.constant
;
1783 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
1784 || item
->jftype
== IPA_JF_LOAD_AGG
);
1786 src_idx
= item
->value
.pass_through
.formal_id
;
1788 if (info
->ipcp_orig_node
)
1790 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1791 value
= info
->known_csts
[src_idx
];
1792 else if (ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
))
1794 ipa_argagg_value_list
avl (ts
);
1795 value
= avl
.get_value (src_idx
,
1796 item
->value
.load_agg
.offset
/ BITS_PER_UNIT
,
1797 item
->value
.load_agg
.by_ref
);
1800 else if (!info
->lattices
.is_empty ())
1802 class ipcp_param_lattices
*src_plats
1803 = ipa_get_parm_lattices (info
, src_idx
);
1805 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
1807 struct ipcp_lattice
<tree
> *lat
= &src_plats
->itself
;
1809 if (!lat
->is_single_const ())
1812 value
= lat
->values
->value
;
1814 else if (src_plats
->aggs
1815 && !src_plats
->aggs_bottom
1816 && !src_plats
->aggs_contain_variable
1817 && src_plats
->aggs_by_ref
== item
->value
.load_agg
.by_ref
)
1819 struct ipcp_agg_lattice
*aglat
;
1821 for (aglat
= src_plats
->aggs
; aglat
; aglat
= aglat
->next
)
1823 if (aglat
->offset
> item
->value
.load_agg
.offset
)
1826 if (aglat
->offset
== item
->value
.load_agg
.offset
)
1828 if (aglat
->is_single_const ())
1829 value
= aglat
->values
->value
;
1839 if (item
->jftype
== IPA_JF_LOAD_AGG
)
1841 tree load_type
= item
->value
.load_agg
.type
;
1842 tree value_type
= TREE_TYPE (value
);
1844 /* Ensure value type is compatible with load type. */
1845 if (!useless_type_conversion_p (load_type
, value_type
))
1849 return ipa_get_jf_arith_result (item
->value
.pass_through
.operation
,
1851 item
->value
.pass_through
.operand
,
1855 /* Process all items in AGG_JFUNC relative to caller (or the node the original
1856 caller is inlined to) NODE which described by INFO and push the results to
1857 RES as describing values passed in parameter DST_INDEX. */
1860 ipa_push_agg_values_from_jfunc (ipa_node_params
*info
, cgraph_node
*node
,
1861 ipa_agg_jump_function
*agg_jfunc
,
1863 vec
<ipa_argagg_value
> *res
)
1865 unsigned prev_unit_offset
= 0;
1868 for (const ipa_agg_jf_item
&item
: agg_jfunc
->items
)
1870 tree value
= ipa_agg_value_from_jfunc (info
, node
, &item
);
1874 ipa_argagg_value iav
;
1876 iav
.unit_offset
= item
.offset
/ BITS_PER_UNIT
;
1877 iav
.index
= dst_index
;
1878 iav
.by_ref
= agg_jfunc
->by_ref
;
1882 || iav
.unit_offset
> prev_unit_offset
);
1883 prev_unit_offset
= iav
.unit_offset
;
1886 res
->safe_push (iav
);
1890 /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
1891 bottom, not containing a variable component and without any known value at
1895 ipcp_verify_propagated_values (void)
1897 struct cgraph_node
*node
;
1899 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
1901 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
1902 if (!opt_for_fn (node
->decl
, flag_ipa_cp
)
1903 || !opt_for_fn (node
->decl
, optimize
))
1905 int i
, count
= ipa_get_param_count (info
);
1907 for (i
= 0; i
< count
; i
++)
1909 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (info
, i
);
1912 && !lat
->contains_variable
1913 && lat
->values_count
== 0)
1917 symtab
->dump (dump_file
);
1918 fprintf (dump_file
, "\nIPA lattices after constant "
1919 "propagation, before gcc_unreachable:\n");
1920 print_all_lattices (dump_file
, true, false);
1929 /* Return true iff X and Y should be considered equal contexts by IPA-CP. */
1932 values_equal_for_ipcp_p (ipa_polymorphic_call_context x
,
1933 ipa_polymorphic_call_context y
)
1935 return x
.equal_to (y
);
1939 /* Add a new value source to the value represented by THIS, marking that a
1940 value comes from edge CS and (if the underlying jump function is a
1941 pass-through or an ancestor one) from a caller value SRC_VAL of a caller
1942 parameter described by SRC_INDEX. OFFSET is negative if the source was the
1943 scalar value of the parameter itself or the offset within an aggregate. */
1945 template <typename valtype
>
1947 ipcp_value
<valtype
>::add_source (cgraph_edge
*cs
, ipcp_value
*src_val
,
1948 int src_idx
, HOST_WIDE_INT offset
)
1950 ipcp_value_source
<valtype
> *src
;
1952 src
= new (ipcp_sources_pool
.allocate ()) ipcp_value_source
<valtype
>;
1953 src
->offset
= offset
;
1956 src
->index
= src_idx
;
1958 src
->next
= sources
;
1962 /* Allocate a new ipcp_value holding a tree constant, initialize its value to
1963 SOURCE and clear all other fields. */
1965 static ipcp_value
<tree
> *
1966 allocate_and_init_ipcp_value (tree cst
, unsigned same_lat_gen_level
)
1968 ipcp_value
<tree
> *val
;
1970 val
= new (ipcp_cst_values_pool
.allocate ()) ipcp_value
<tree
>();
1972 val
->self_recursion_generated_level
= same_lat_gen_level
;
1976 /* Allocate a new ipcp_value holding a polymorphic context, initialize its
1977 value to SOURCE and clear all other fields. */
1979 static ipcp_value
<ipa_polymorphic_call_context
> *
1980 allocate_and_init_ipcp_value (ipa_polymorphic_call_context ctx
,
1981 unsigned same_lat_gen_level
)
1983 ipcp_value
<ipa_polymorphic_call_context
> *val
;
1985 val
= new (ipcp_poly_ctx_values_pool
.allocate ())
1986 ipcp_value
<ipa_polymorphic_call_context
>();
1988 val
->self_recursion_generated_level
= same_lat_gen_level
;
1992 /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it. CS,
1993 SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
1994 meaning. OFFSET -1 means the source is scalar and not a part of an
1995 aggregate. If non-NULL, VAL_P records address of existing or newly added
1998 If the value is generated for a self-recursive call as a result of an
1999 arithmetic pass-through jump-function acting on a value in the same lattice,
2000 SAME_LAT_GEN_LEVEL must be the length of such chain, otherwise it must be
2001 zero. If it is non-zero, PARAM_IPA_CP_VALUE_LIST_SIZE limit is ignored. */
2003 template <typename valtype
>
2005 ipcp_lattice
<valtype
>::add_value (valtype newval
, cgraph_edge
*cs
,
2006 ipcp_value
<valtype
> *src_val
,
2007 int src_idx
, HOST_WIDE_INT offset
,
2008 ipcp_value
<valtype
> **val_p
,
2009 unsigned same_lat_gen_level
)
2011 ipcp_value
<valtype
> *val
, *last_val
= NULL
;
2019 for (val
= values
; val
; last_val
= val
, val
= val
->next
)
2020 if (values_equal_for_ipcp_p (val
->value
, newval
))
2025 if (val
->self_recursion_generated_level
< same_lat_gen_level
)
2026 val
->self_recursion_generated_level
= same_lat_gen_level
;
2028 if (ipa_edge_within_scc (cs
))
2030 ipcp_value_source
<valtype
> *s
;
2031 for (s
= val
->sources
; s
; s
= s
->next
)
2032 if (s
->cs
== cs
&& s
->val
== src_val
)
2038 val
->add_source (cs
, src_val
, src_idx
, offset
);
2042 if (!same_lat_gen_level
&& values_count
>= opt_for_fn (cs
->callee
->decl
,
2043 param_ipa_cp_value_list_size
))
2045 /* We can only free sources, not the values themselves, because sources
2046 of other values in this SCC might point to them. */
2047 for (val
= values
; val
; val
= val
->next
)
2049 while (val
->sources
)
2051 ipcp_value_source
<valtype
> *src
= val
->sources
;
2052 val
->sources
= src
->next
;
2053 ipcp_sources_pool
.remove ((ipcp_value_source
<tree
>*)src
);
2057 return set_to_bottom ();
2061 val
= allocate_and_init_ipcp_value (newval
, same_lat_gen_level
);
2062 val
->add_source (cs
, src_val
, src_idx
, offset
);
2065 /* Add the new value to end of value list, which can reduce iterations
2066 of propagation stage for recursive function. */
2068 last_val
->next
= val
;
2078 /* A helper function that returns result of operation specified by OPCODE on
2079 the value of SRC_VAL. If non-NULL, OPND1_TYPE is expected type for the
2080 value of SRC_VAL. If the operation is binary, OPND2 is a constant value
2081 acting as its second operand. If non-NULL, RES_TYPE is expected type of
2085 get_val_across_arith_op (enum tree_code opcode
,
2088 ipcp_value
<tree
> *src_val
,
2091 tree opnd1
= src_val
->value
;
2093 /* Skip source values that is incompatible with specified type. */
2095 && !useless_type_conversion_p (opnd1_type
, TREE_TYPE (opnd1
)))
2098 return ipa_get_jf_arith_result (opcode
, opnd1
, opnd2
, res_type
);
2101 /* Propagate values through an arithmetic transformation described by a jump
2102 function associated with edge CS, taking values from SRC_LAT and putting
2103 them into DEST_LAT. OPND1_TYPE is expected type for the values in SRC_LAT.
2104 OPND2 is a constant value if transformation is a binary operation.
2105 SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
2106 a part of the aggregate. SRC_IDX is the index of the source parameter.
2107 RES_TYPE is the value type of result being propagated into. Return true if
2108 DEST_LAT changed. */
2111 propagate_vals_across_arith_jfunc (cgraph_edge
*cs
,
2112 enum tree_code opcode
,
2115 ipcp_lattice
<tree
> *src_lat
,
2116 ipcp_lattice
<tree
> *dest_lat
,
2117 HOST_WIDE_INT src_offset
,
2121 ipcp_value
<tree
> *src_val
;
2124 /* Due to circular dependencies, propagating within an SCC through arithmetic
2125 transformation would create infinite number of values. But for
2126 self-feeding recursive function, we could allow propagation in a limited
2127 count, and this can enable a simple kind of recursive function versioning.
2128 For other scenario, we would just make lattices bottom. */
2129 if (opcode
!= NOP_EXPR
&& ipa_edge_within_scc (cs
))
2133 int max_recursive_depth
= opt_for_fn(cs
->caller
->decl
,
2134 param_ipa_cp_max_recursive_depth
);
2135 if (src_lat
!= dest_lat
|| max_recursive_depth
< 1)
2136 return dest_lat
->set_contains_variable ();
2138 /* No benefit if recursive execution is in low probability. */
2139 if (cs
->sreal_frequency () * 100
2140 <= ((sreal
) 1) * opt_for_fn (cs
->caller
->decl
,
2141 param_ipa_cp_min_recursive_probability
))
2142 return dest_lat
->set_contains_variable ();
2144 auto_vec
<ipcp_value
<tree
> *, 8> val_seeds
;
2146 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2148 /* Now we do not use self-recursively generated value as propagation
2149 source, this is absolutely conservative, but could avoid explosion
2150 of lattice's value space, especially when one recursive function
2151 calls another recursive. */
2152 if (src_val
->self_recursion_generated_p ())
2154 ipcp_value_source
<tree
> *s
;
2156 /* If the lattice has already been propagated for the call site,
2157 no need to do that again. */
2158 for (s
= src_val
->sources
; s
; s
= s
->next
)
2160 return dest_lat
->set_contains_variable ();
2163 val_seeds
.safe_push (src_val
);
2166 gcc_assert ((int) val_seeds
.length () <= param_ipa_cp_value_list_size
);
2168 /* Recursively generate lattice values with a limited count. */
2169 FOR_EACH_VEC_ELT (val_seeds
, i
, src_val
)
2171 for (int j
= 1; j
< max_recursive_depth
; j
++)
2173 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2176 || !ipacp_value_safe_for_type (res_type
, cstval
))
2179 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2180 src_offset
, &src_val
, j
);
2181 gcc_checking_assert (src_val
);
2184 ret
|= dest_lat
->set_contains_variable ();
2187 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2189 /* Now we do not use self-recursively generated value as propagation
2190 source, otherwise it is easy to make value space of normal lattice
2192 if (src_val
->self_recursion_generated_p ())
2194 ret
|= dest_lat
->set_contains_variable ();
2198 tree cstval
= get_val_across_arith_op (opcode
, opnd1_type
, opnd2
,
2201 && ipacp_value_safe_for_type (res_type
, cstval
))
2202 ret
|= dest_lat
->add_value (cstval
, cs
, src_val
, src_idx
,
2205 ret
|= dest_lat
->set_contains_variable ();
2211 /* Propagate values through a pass-through jump function JFUNC associated with
2212 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2213 is the index of the source parameter. PARM_TYPE is the type of the
2214 parameter to which the result is passed. */
2217 propagate_vals_across_pass_through (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2218 ipcp_lattice
<tree
> *src_lat
,
2219 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2222 return propagate_vals_across_arith_jfunc (cs
,
2223 ipa_get_jf_pass_through_operation (jfunc
),
2225 ipa_get_jf_pass_through_operand (jfunc
),
2226 src_lat
, dest_lat
, -1, src_idx
, parm_type
);
2229 /* Propagate values through an ancestor jump function JFUNC associated with
2230 edge CS, taking values from SRC_LAT and putting them into DEST_LAT. SRC_IDX
2231 is the index of the source parameter. */
2234 propagate_vals_across_ancestor (struct cgraph_edge
*cs
,
2235 struct ipa_jump_func
*jfunc
,
2236 ipcp_lattice
<tree
> *src_lat
,
2237 ipcp_lattice
<tree
> *dest_lat
, int src_idx
,
2240 ipcp_value
<tree
> *src_val
;
2243 if (ipa_edge_within_scc (cs
))
2244 return dest_lat
->set_contains_variable ();
2246 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2248 tree t
= ipa_get_jf_ancestor_result (jfunc
, src_val
->value
);
2250 if (t
&& ipacp_value_safe_for_type (param_type
, t
))
2251 ret
|= dest_lat
->add_value (t
, cs
, src_val
, src_idx
);
2253 ret
|= dest_lat
->set_contains_variable ();
2259 /* Propagate scalar values across jump function JFUNC that is associated with
2260 edge CS and put the values into DEST_LAT. PARM_TYPE is the type of the
2261 parameter to which the result is passed. */
2264 propagate_scalar_across_jump_function (struct cgraph_edge
*cs
,
2265 struct ipa_jump_func
*jfunc
,
2266 ipcp_lattice
<tree
> *dest_lat
,
2269 if (dest_lat
->bottom
)
2272 if (jfunc
->type
== IPA_JF_CONST
)
2274 tree val
= ipa_get_jf_constant (jfunc
);
2275 if (ipacp_value_safe_for_type (param_type
, val
))
2276 return dest_lat
->add_value (val
, cs
, NULL
, 0);
2278 return dest_lat
->set_contains_variable ();
2280 else if (jfunc
->type
== IPA_JF_PASS_THROUGH
2281 || jfunc
->type
== IPA_JF_ANCESTOR
)
2283 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2284 ipcp_lattice
<tree
> *src_lat
;
2288 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2289 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2291 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2293 src_lat
= ipa_get_scalar_lat (caller_info
, src_idx
);
2294 if (src_lat
->bottom
)
2295 return dest_lat
->set_contains_variable ();
2297 /* If we would need to clone the caller and cannot, do not propagate. */
2298 if (!ipcp_versionable_function_p (cs
->caller
)
2299 && (src_lat
->contains_variable
2300 || (src_lat
->values_count
> 1)))
2301 return dest_lat
->set_contains_variable ();
2303 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2304 ret
= propagate_vals_across_pass_through (cs
, jfunc
, src_lat
,
2308 ret
= propagate_vals_across_ancestor (cs
, jfunc
, src_lat
, dest_lat
,
2309 src_idx
, param_type
);
2311 if (src_lat
->contains_variable
)
2312 ret
|= dest_lat
->set_contains_variable ();
2317 /* TODO: We currently do not handle member method pointers in IPA-CP (we only
2318 use it for indirect inlining), we should propagate them too. */
2319 return dest_lat
->set_contains_variable ();
2322 /* Propagate scalar values across jump function JFUNC that is associated with
2323 edge CS and describes argument IDX and put the values into DEST_LAT. */
2326 propagate_context_across_jump_function (cgraph_edge
*cs
,
2327 ipa_jump_func
*jfunc
, int idx
,
2328 ipcp_lattice
<ipa_polymorphic_call_context
> *dest_lat
)
2330 if (dest_lat
->bottom
)
2332 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
2334 bool added_sth
= false;
2335 bool type_preserved
= true;
2337 ipa_polymorphic_call_context edge_ctx
, *edge_ctx_ptr
2338 = ipa_get_ith_polymorhic_call_context (args
, idx
);
2341 edge_ctx
= *edge_ctx_ptr
;
2343 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2344 || jfunc
->type
== IPA_JF_ANCESTOR
)
2346 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2348 ipcp_lattice
<ipa_polymorphic_call_context
> *src_lat
;
2350 /* TODO: Once we figure out how to propagate speculations, it will
2351 probably be a good idea to switch to speculation if type_preserved is
2352 not set instead of punting. */
2353 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2355 if (ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
)
2357 type_preserved
= ipa_get_jf_pass_through_type_preserved (jfunc
);
2358 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2362 type_preserved
= ipa_get_jf_ancestor_type_preserved (jfunc
);
2363 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2366 src_lat
= ipa_get_poly_ctx_lat (caller_info
, src_idx
);
2367 /* If we would need to clone the caller and cannot, do not propagate. */
2368 if (!ipcp_versionable_function_p (cs
->caller
)
2369 && (src_lat
->contains_variable
2370 || (src_lat
->values_count
> 1)))
2373 ipcp_value
<ipa_polymorphic_call_context
> *src_val
;
2374 for (src_val
= src_lat
->values
; src_val
; src_val
= src_val
->next
)
2376 ipa_polymorphic_call_context cur
= src_val
->value
;
2378 if (!type_preserved
)
2379 cur
.possible_dynamic_type_change (cs
->in_polymorphic_cdtor
);
2380 if (jfunc
->type
== IPA_JF_ANCESTOR
)
2381 cur
.offset_by (ipa_get_jf_ancestor_offset (jfunc
));
2382 /* TODO: In cases we know how the context is going to be used,
2383 we can improve the result by passing proper OTR_TYPE. */
2384 cur
.combine_with (edge_ctx
);
2385 if (!cur
.useless_p ())
2387 if (src_lat
->contains_variable
2388 && !edge_ctx
.equal_to (cur
))
2389 ret
|= dest_lat
->set_contains_variable ();
2390 ret
|= dest_lat
->add_value (cur
, cs
, src_val
, src_idx
);
2399 if (!edge_ctx
.useless_p ())
2400 ret
|= dest_lat
->add_value (edge_ctx
, cs
);
2402 ret
|= dest_lat
->set_contains_variable ();
2408 /* Propagate bits across jfunc that is associated with
2409 edge cs and update dest_lattice accordingly. */
2412 propagate_bits_across_jump_function (cgraph_edge
*cs
, int idx
,
2413 ipa_jump_func
*jfunc
,
2414 ipcp_bits_lattice
*dest_lattice
)
2416 if (dest_lattice
->bottom_p ())
2419 enum availability availability
;
2420 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
2421 ipa_node_params
*callee_info
= ipa_node_params_sum
->get (callee
);
2422 tree parm_type
= ipa_get_type (callee_info
, idx
);
2424 /* For K&R C programs, ipa_get_type() could return NULL_TREE. Avoid the
2425 transform for these cases. Similarly, we can have bad type mismatches
2426 with LTO, avoid doing anything with those too. */
2428 || (!INTEGRAL_TYPE_P (parm_type
) && !POINTER_TYPE_P (parm_type
)))
2430 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2431 fprintf (dump_file
, "Setting dest_lattice to bottom, because type of "
2432 "param %i of %s is NULL or unsuitable for bits propagation\n",
2433 idx
, cs
->callee
->dump_name ());
2435 return dest_lattice
->set_to_bottom ();
2438 unsigned precision
= TYPE_PRECISION (parm_type
);
2439 signop sgn
= TYPE_SIGN (parm_type
);
2441 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2442 || jfunc
->type
== IPA_JF_ANCESTOR
)
2444 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2445 tree operand
= NULL_TREE
;
2446 enum tree_code code
;
2448 bool keep_null
= false;
2450 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2452 code
= ipa_get_jf_pass_through_operation (jfunc
);
2453 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2454 if (code
!= NOP_EXPR
)
2455 operand
= ipa_get_jf_pass_through_operand (jfunc
);
2459 code
= POINTER_PLUS_EXPR
;
2460 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2461 unsigned HOST_WIDE_INT offset
2462 = ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
2463 keep_null
= (ipa_get_jf_ancestor_keep_null (jfunc
) || !offset
);
2464 operand
= build_int_cstu (size_type_node
, offset
);
2467 class ipcp_param_lattices
*src_lats
2468 = ipa_get_parm_lattices (caller_info
, src_idx
);
2470 /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
2476 Assume lattice for x is bottom, however we can still propagate
2477 result of x & 0xff == 0xff, which gets computed during ccp1 pass
2478 and we store it in jump function during analysis stage. */
2480 if (!src_lats
->bits_lattice
.bottom_p ())
2483 = keep_null
&& !src_lats
->bits_lattice
.known_nonzero_p ();
2485 return dest_lattice
->meet_with (src_lats
->bits_lattice
, precision
,
2486 sgn
, code
, operand
, drop_all_ones
);
2490 value_range
vr (parm_type
);
2493 jfunc
->m_vr
->get_vrange (vr
);
2494 if (!vr
.undefined_p () && !vr
.varying_p ())
2496 irange_bitmask bm
= vr
.get_bitmask ();
2498 = widest_int::from (bm
.mask (), TYPE_SIGN (parm_type
));
2500 = widest_int::from (bm
.value (), TYPE_SIGN (parm_type
));
2501 return dest_lattice
->meet_with (value
, mask
, precision
);
2504 return dest_lattice
->set_to_bottom ();
2507 /* Propagate value range across jump function JFUNC that is associated with
2508 edge CS with param of callee of PARAM_TYPE and update DEST_PLATS
2512 propagate_vr_across_jump_function (cgraph_edge
*cs
, ipa_jump_func
*jfunc
,
2513 class ipcp_param_lattices
*dest_plats
,
2516 ipcp_vr_lattice
*dest_lat
= &dest_plats
->m_value_range
;
2518 if (dest_lat
->bottom_p ())
2522 || !ipa_vr_supported_type_p (param_type
))
2523 return dest_lat
->set_to_bottom ();
2525 if (jfunc
->type
== IPA_JF_PASS_THROUGH
)
2527 enum tree_code operation
= ipa_get_jf_pass_through_operation (jfunc
);
2528 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2529 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2530 class ipcp_param_lattices
*src_lats
2531 = ipa_get_parm_lattices (caller_info
, src_idx
);
2532 tree operand_type
= ipa_get_type (caller_info
, src_idx
);
2534 if (src_lats
->m_value_range
.bottom_p ())
2535 return dest_lat
->set_to_bottom ();
2537 value_range
vr (param_type
);
2538 if (TREE_CODE_CLASS (operation
) == tcc_unary
)
2539 ipa_vr_operation_and_type_effects (vr
,
2540 src_lats
->m_value_range
.m_vr
,
2541 operation
, param_type
,
2543 /* A crude way to prevent unbounded number of value range updates
2544 in SCC components. We should allow limited number of updates within
2546 else if (!ipa_edge_within_scc (cs
))
2548 tree op
= ipa_get_jf_pass_through_operand (jfunc
);
2549 value_range
op_vr (TREE_TYPE (op
));
2550 value_range
op_res (param_type
);
2551 range_op_handler
handler (operation
);
2553 ipa_range_set_and_normalize (op_vr
, op
);
2556 || !ipa_vr_supported_type_p (operand_type
)
2557 /* Sometimes we try to fold comparison operators using a
2558 pointer type to hold the result instead of a boolean
2559 type. Avoid trapping in the sanity check in
2560 fold_range until this is fixed. */
2561 || src_lats
->m_value_range
.m_vr
.undefined_p ()
2562 || op_vr
.undefined_p ()
2563 || !handler
.operand_check_p (operand_type
,
2564 src_lats
->m_value_range
.m_vr
.type (),
2566 || !handler
.fold_range (op_res
, operand_type
,
2567 src_lats
->m_value_range
.m_vr
, op_vr
))
2568 op_res
.set_varying (param_type
);
2570 ipa_vr_operation_and_type_effects (vr
,
2572 NOP_EXPR
, param_type
,
2575 if (!vr
.undefined_p () && !vr
.varying_p ())
2579 value_range
jvr (param_type
);
2580 if (ipa_vr_operation_and_type_effects (jvr
, *jfunc
->m_vr
,
2583 jfunc
->m_vr
->type ()))
2586 return dest_lat
->meet_with (vr
);
2589 else if (jfunc
->type
== IPA_JF_CONST
)
2591 tree val
= ipa_get_jf_constant (jfunc
);
2592 if (TREE_CODE (val
) == INTEGER_CST
)
2594 val
= fold_convert (param_type
, val
);
2595 if (TREE_OVERFLOW_P (val
))
2596 val
= drop_tree_overflow (val
);
2598 value_range
tmpvr (val
, val
);
2599 return dest_lat
->meet_with (tmpvr
);
2603 value_range
vr (param_type
);
2605 && ipa_vr_operation_and_type_effects (vr
, *jfunc
->m_vr
, NOP_EXPR
,
2607 jfunc
->m_vr
->type ()))
2608 return dest_lat
->meet_with (vr
);
2610 return dest_lat
->set_to_bottom ();
2613 /* If DEST_PLATS already has aggregate items, check that aggs_by_ref matches
2614 NEW_AGGS_BY_REF and if not, mark all aggs as bottoms and return true (in all
2615 other cases, return false). If there are no aggregate items, set
2616 aggs_by_ref to NEW_AGGS_BY_REF. */
2619 set_check_aggs_by_ref (class ipcp_param_lattices
*dest_plats
,
2620 bool new_aggs_by_ref
)
2622 if (dest_plats
->aggs
)
2624 if (dest_plats
->aggs_by_ref
!= new_aggs_by_ref
)
2626 set_agg_lats_to_bottom (dest_plats
);
2631 dest_plats
->aggs_by_ref
= new_aggs_by_ref
;
2635 /* Walk aggregate lattices in DEST_PLATS from ***AGLAT on, until ***aglat is an
2636 already existing lattice for the given OFFSET and SIZE, marking all skipped
2637 lattices as containing variable and checking for overlaps. If there is no
2638 already existing lattice for the OFFSET and VAL_SIZE, create one, initialize
2639 it with offset, size and contains_variable to PRE_EXISTING, and return true,
2640 unless there are too many already. If there are two many, return false. If
2641 there are overlaps turn whole DEST_PLATS to bottom and return false. If any
2642 skipped lattices were newly marked as containing variable, set *CHANGE to
2643 true. MAX_AGG_ITEMS is the maximum number of lattices. */
2646 merge_agg_lats_step (class ipcp_param_lattices
*dest_plats
,
2647 HOST_WIDE_INT offset
, HOST_WIDE_INT val_size
,
2648 struct ipcp_agg_lattice
***aglat
,
2649 bool pre_existing
, bool *change
, int max_agg_items
)
2651 gcc_checking_assert (offset
>= 0);
2653 while (**aglat
&& (**aglat
)->offset
< offset
)
2655 if ((**aglat
)->offset
+ (**aglat
)->size
> offset
)
2657 set_agg_lats_to_bottom (dest_plats
);
2660 *change
|= (**aglat
)->set_contains_variable ();
2661 *aglat
= &(**aglat
)->next
;
2664 if (**aglat
&& (**aglat
)->offset
== offset
)
2666 if ((**aglat
)->size
!= val_size
)
2668 set_agg_lats_to_bottom (dest_plats
);
2671 gcc_assert (!(**aglat
)->next
2672 || (**aglat
)->next
->offset
>= offset
+ val_size
);
2677 struct ipcp_agg_lattice
*new_al
;
2679 if (**aglat
&& (**aglat
)->offset
< offset
+ val_size
)
2681 set_agg_lats_to_bottom (dest_plats
);
2684 if (dest_plats
->aggs_count
== max_agg_items
)
2686 dest_plats
->aggs_count
++;
2687 new_al
= ipcp_agg_lattice_pool
.allocate ();
2689 new_al
->offset
= offset
;
2690 new_al
->size
= val_size
;
2691 new_al
->contains_variable
= pre_existing
;
2693 new_al
->next
= **aglat
;
2699 /* Set all AGLAT and all other aggregate lattices reachable by next pointers as
2700 containing an unknown value. */
2703 set_chain_of_aglats_contains_variable (struct ipcp_agg_lattice
*aglat
)
2708 ret
|= aglat
->set_contains_variable ();
2709 aglat
= aglat
->next
;
2714 /* Merge existing aggregate lattices in SRC_PLATS to DEST_PLATS, subtracting
2715 DELTA_OFFSET. CS is the call graph edge and SRC_IDX the index of the source
2716 parameter used for lattice value sources. Return true if DEST_PLATS changed
2720 merge_aggregate_lattices (struct cgraph_edge
*cs
,
2721 class ipcp_param_lattices
*dest_plats
,
2722 class ipcp_param_lattices
*src_plats
,
2723 int src_idx
, HOST_WIDE_INT offset_delta
)
2725 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2726 struct ipcp_agg_lattice
**dst_aglat
;
2729 if (set_check_aggs_by_ref (dest_plats
, src_plats
->aggs_by_ref
))
2731 if (src_plats
->aggs_bottom
)
2732 return set_agg_lats_contain_variable (dest_plats
);
2733 if (src_plats
->aggs_contain_variable
)
2734 ret
|= set_agg_lats_contain_variable (dest_plats
);
2735 dst_aglat
= &dest_plats
->aggs
;
2737 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2738 param_ipa_max_agg_items
);
2739 for (struct ipcp_agg_lattice
*src_aglat
= src_plats
->aggs
;
2741 src_aglat
= src_aglat
->next
)
2743 HOST_WIDE_INT new_offset
= src_aglat
->offset
- offset_delta
;
2747 if (merge_agg_lats_step (dest_plats
, new_offset
, src_aglat
->size
,
2748 &dst_aglat
, pre_existing
, &ret
, max_agg_items
))
2750 struct ipcp_agg_lattice
*new_al
= *dst_aglat
;
2752 dst_aglat
= &(*dst_aglat
)->next
;
2753 if (src_aglat
->bottom
)
2755 ret
|= new_al
->set_contains_variable ();
2758 if (src_aglat
->contains_variable
)
2759 ret
|= new_al
->set_contains_variable ();
2760 for (ipcp_value
<tree
> *val
= src_aglat
->values
;
2763 ret
|= new_al
->add_value (val
->value
, cs
, val
, src_idx
,
2766 else if (dest_plats
->aggs_bottom
)
2769 ret
|= set_chain_of_aglats_contains_variable (*dst_aglat
);
2773 /* Determine whether there is anything to propagate FROM SRC_PLATS through a
2774 pass-through JFUNC and if so, whether it has conform and conforms to the
2775 rules about propagating values passed by reference. */
2778 agg_pass_through_permissible_p (class ipcp_param_lattices
*src_plats
,
2779 struct ipa_jump_func
*jfunc
)
2781 return src_plats
->aggs
2782 && (!src_plats
->aggs_by_ref
2783 || ipa_get_jf_pass_through_agg_preserved (jfunc
));
2786 /* Propagate values through ITEM, jump function for a part of an aggregate,
2787 into corresponding aggregate lattice AGLAT. CS is the call graph edge
2788 associated with the jump function. Return true if AGLAT changed in any
2792 propagate_aggregate_lattice (struct cgraph_edge
*cs
,
2793 struct ipa_agg_jf_item
*item
,
2794 struct ipcp_agg_lattice
*aglat
)
2796 class ipa_node_params
*caller_info
;
2797 class ipcp_param_lattices
*src_plats
;
2798 struct ipcp_lattice
<tree
> *src_lat
;
2799 HOST_WIDE_INT src_offset
;
2804 if (item
->jftype
== IPA_JF_CONST
)
2806 tree value
= item
->value
.constant
;
2808 gcc_checking_assert (is_gimple_ip_invariant (value
));
2809 return aglat
->add_value (value
, cs
, NULL
, 0);
2812 gcc_checking_assert (item
->jftype
== IPA_JF_PASS_THROUGH
2813 || item
->jftype
== IPA_JF_LOAD_AGG
);
2815 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2816 src_idx
= item
->value
.pass_through
.formal_id
;
2817 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2819 if (item
->jftype
== IPA_JF_PASS_THROUGH
)
2821 load_type
= NULL_TREE
;
2822 src_lat
= &src_plats
->itself
;
2827 HOST_WIDE_INT load_offset
= item
->value
.load_agg
.offset
;
2828 struct ipcp_agg_lattice
*src_aglat
;
2830 for (src_aglat
= src_plats
->aggs
; src_aglat
; src_aglat
= src_aglat
->next
)
2831 if (src_aglat
->offset
>= load_offset
)
2834 load_type
= item
->value
.load_agg
.type
;
2836 || src_aglat
->offset
> load_offset
2837 || src_aglat
->size
!= tree_to_shwi (TYPE_SIZE (load_type
))
2838 || src_plats
->aggs_by_ref
!= item
->value
.load_agg
.by_ref
)
2839 return aglat
->set_contains_variable ();
2841 src_lat
= src_aglat
;
2842 src_offset
= load_offset
;
2846 || (!ipcp_versionable_function_p (cs
->caller
)
2847 && !src_lat
->is_single_const ()))
2848 return aglat
->set_contains_variable ();
2850 ret
= propagate_vals_across_arith_jfunc (cs
,
2851 item
->value
.pass_through
.operation
,
2853 item
->value
.pass_through
.operand
,
2859 if (src_lat
->contains_variable
)
2860 ret
|= aglat
->set_contains_variable ();
2865 /* Propagate scalar values across jump function JFUNC that is associated with
2866 edge CS and put the values into DEST_LAT. */
2869 propagate_aggs_across_jump_function (struct cgraph_edge
*cs
,
2870 struct ipa_jump_func
*jfunc
,
2871 class ipcp_param_lattices
*dest_plats
)
2875 if (dest_plats
->aggs_bottom
)
2878 if (jfunc
->type
== IPA_JF_PASS_THROUGH
2879 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
2881 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2882 int src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
2883 class ipcp_param_lattices
*src_plats
;
2885 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2886 if (agg_pass_through_permissible_p (src_plats
, jfunc
))
2888 /* Currently we do not produce clobber aggregate jump
2889 functions, replace with merging when we do. */
2890 gcc_assert (!jfunc
->agg
.items
);
2891 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
,
2896 else if (jfunc
->type
== IPA_JF_ANCESTOR
2897 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
2899 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
2900 int src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
2901 class ipcp_param_lattices
*src_plats
;
2903 src_plats
= ipa_get_parm_lattices (caller_info
, src_idx
);
2904 if (src_plats
->aggs
&& src_plats
->aggs_by_ref
)
2906 /* Currently we do not produce clobber aggregate jump
2907 functions, replace with merging when we do. */
2908 gcc_assert (!jfunc
->agg
.items
);
2909 ret
|= merge_aggregate_lattices (cs
, dest_plats
, src_plats
, src_idx
,
2910 ipa_get_jf_ancestor_offset (jfunc
));
2912 else if (!src_plats
->aggs_by_ref
)
2913 ret
|= set_agg_lats_to_bottom (dest_plats
);
2915 ret
|= set_agg_lats_contain_variable (dest_plats
);
2919 if (jfunc
->agg
.items
)
2921 bool pre_existing
= dest_plats
->aggs
!= NULL
;
2922 struct ipcp_agg_lattice
**aglat
= &dest_plats
->aggs
;
2923 struct ipa_agg_jf_item
*item
;
2926 if (set_check_aggs_by_ref (dest_plats
, jfunc
->agg
.by_ref
))
2929 int max_agg_items
= opt_for_fn (cs
->callee
->function_symbol ()->decl
,
2930 param_ipa_max_agg_items
);
2931 FOR_EACH_VEC_ELT (*jfunc
->agg
.items
, i
, item
)
2933 HOST_WIDE_INT val_size
;
2935 if (item
->offset
< 0 || item
->jftype
== IPA_JF_UNKNOWN
)
2937 val_size
= tree_to_shwi (TYPE_SIZE (item
->type
));
2939 if (merge_agg_lats_step (dest_plats
, item
->offset
, val_size
,
2940 &aglat
, pre_existing
, &ret
, max_agg_items
))
2942 ret
|= propagate_aggregate_lattice (cs
, item
, *aglat
);
2943 aglat
= &(*aglat
)->next
;
2945 else if (dest_plats
->aggs_bottom
)
2949 ret
|= set_chain_of_aglats_contains_variable (*aglat
);
2952 ret
|= set_agg_lats_contain_variable (dest_plats
);
2957 /* Return true if on the way cfrom CS->caller to the final (non-alias and
2958 non-thunk) destination, the call passes through a thunk. */
2961 call_passes_through_thunk (cgraph_edge
*cs
)
2963 cgraph_node
*alias_or_thunk
= cs
->callee
;
2964 while (alias_or_thunk
->alias
)
2965 alias_or_thunk
= alias_or_thunk
->get_alias_target ();
2966 return alias_or_thunk
->thunk
;
2969 /* Propagate constants from the caller to the callee of CS. INFO describes the
2973 propagate_constants_across_call (struct cgraph_edge
*cs
)
2975 class ipa_node_params
*callee_info
;
2976 enum availability availability
;
2977 cgraph_node
*callee
;
2978 class ipa_edge_args
*args
;
2980 int i
, args_count
, parms_count
;
2982 callee
= cs
->callee
->function_symbol (&availability
);
2983 if (!callee
->definition
)
2985 gcc_checking_assert (callee
->has_gimple_body_p ());
2986 callee_info
= ipa_node_params_sum
->get (callee
);
2990 args
= ipa_edge_args_sum
->get (cs
);
2991 parms_count
= ipa_get_param_count (callee_info
);
2992 if (parms_count
== 0)
2995 || !opt_for_fn (cs
->caller
->decl
, flag_ipa_cp
)
2996 || !opt_for_fn (cs
->caller
->decl
, optimize
))
2998 for (i
= 0; i
< parms_count
; i
++)
2999 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3003 args_count
= ipa_get_cs_argument_count (args
);
3005 /* If this call goes through a thunk we must not propagate to the first (0th)
3006 parameter. However, we might need to uncover a thunk from below a series
3007 of aliases first. */
3008 if (call_passes_through_thunk (cs
))
3010 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
,
3017 for (; (i
< args_count
) && (i
< parms_count
); i
++)
3019 struct ipa_jump_func
*jump_func
= ipa_get_ith_jump_func (args
, i
);
3020 class ipcp_param_lattices
*dest_plats
;
3021 tree param_type
= ipa_get_type (callee_info
, i
);
3023 dest_plats
= ipa_get_parm_lattices (callee_info
, i
);
3024 if (availability
== AVAIL_INTERPOSABLE
)
3025 ret
|= set_all_contains_variable (dest_plats
);
3028 ret
|= propagate_scalar_across_jump_function (cs
, jump_func
,
3029 &dest_plats
->itself
,
3031 ret
|= propagate_context_across_jump_function (cs
, jump_func
, i
,
3032 &dest_plats
->ctxlat
);
3034 |= propagate_bits_across_jump_function (cs
, i
, jump_func
,
3035 &dest_plats
->bits_lattice
);
3036 ret
|= propagate_aggs_across_jump_function (cs
, jump_func
,
3038 if (opt_for_fn (callee
->decl
, flag_ipa_vrp
))
3039 ret
|= propagate_vr_across_jump_function (cs
, jump_func
,
3040 dest_plats
, param_type
);
3042 ret
|= dest_plats
->m_value_range
.set_to_bottom ();
3045 for (; i
< parms_count
; i
++)
3046 ret
|= set_all_contains_variable (ipa_get_parm_lattices (callee_info
, i
));
3051 /* If an indirect edge IE can be turned into a direct one based on KNOWN_VALS
3052 KNOWN_CONTEXTS, and known aggregates either in AVS or KNOWN_AGGS return
3053 the destination. The latter three can be NULL. If AGG_REPS is not NULL,
3054 KNOWN_AGGS is ignored. */
3057 ipa_get_indirect_edge_target_1 (struct cgraph_edge
*ie
,
3058 const vec
<tree
> &known_csts
,
3059 const vec
<ipa_polymorphic_call_context
> &known_contexts
,
3060 const ipa_argagg_value_list
&avs
,
3063 int param_index
= ie
->indirect_info
->param_index
;
3064 HOST_WIDE_INT anc_offset
;
3068 *speculative
= false;
3070 if (param_index
== -1)
3073 if (!ie
->indirect_info
->polymorphic
)
3077 if (ie
->indirect_info
->agg_contents
)
3080 if ((unsigned) param_index
< known_csts
.length ()
3081 && known_csts
[param_index
])
3082 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3083 ie
->indirect_info
->offset
,
3084 ie
->indirect_info
->by_ref
);
3086 if (!t
&& ie
->indirect_info
->guaranteed_unmodified
)
3087 t
= avs
.get_value (param_index
,
3088 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3089 ie
->indirect_info
->by_ref
);
3091 else if ((unsigned) param_index
< known_csts
.length ())
3092 t
= known_csts
[param_index
];
3095 && TREE_CODE (t
) == ADDR_EXPR
3096 && TREE_CODE (TREE_OPERAND (t
, 0)) == FUNCTION_DECL
)
3097 return TREE_OPERAND (t
, 0);
3102 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize
))
3105 gcc_assert (!ie
->indirect_info
->agg_contents
);
3106 gcc_assert (!ie
->indirect_info
->by_ref
);
3107 anc_offset
= ie
->indirect_info
->offset
;
3111 if ((unsigned) param_index
< known_csts
.length ()
3112 && known_csts
[param_index
])
3113 t
= ipa_find_agg_cst_from_init (known_csts
[param_index
],
3114 ie
->indirect_info
->offset
, true);
3116 /* Try to work out value of virtual table pointer value in replacements. */
3117 /* or known aggregate values. */
3119 t
= avs
.get_value (param_index
,
3120 ie
->indirect_info
->offset
/ BITS_PER_UNIT
,
3123 /* If we found the virtual table pointer, lookup the target. */
3127 unsigned HOST_WIDE_INT offset
;
3128 if (vtable_pointer_value_to_vtable (t
, &vtable
, &offset
))
3131 target
= gimple_get_virt_method_for_vtable (ie
->indirect_info
->otr_token
,
3132 vtable
, offset
, &can_refer
);
3136 || fndecl_built_in_p (target
, BUILT_IN_UNREACHABLE
)
3137 || !possible_polymorphic_call_target_p
3138 (ie
, cgraph_node::get (target
)))
3140 /* Do not speculate builtin_unreachable, it is stupid! */
3141 if (ie
->indirect_info
->vptr_changed
)
3143 target
= ipa_impossible_devirt_target (ie
, target
);
3145 *speculative
= ie
->indirect_info
->vptr_changed
;
3152 /* Do we know the constant value of pointer? */
3153 if (!t
&& (unsigned) param_index
< known_csts
.length ())
3154 t
= known_csts
[param_index
];
3156 gcc_checking_assert (!t
|| TREE_CODE (t
) != TREE_BINFO
);
3158 ipa_polymorphic_call_context context
;
3159 if (known_contexts
.length () > (unsigned int) param_index
)
3161 context
= known_contexts
[param_index
];
3162 context
.offset_by (anc_offset
);
3163 if (ie
->indirect_info
->vptr_changed
)
3164 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3165 ie
->indirect_info
->otr_type
);
3168 ipa_polymorphic_call_context ctx2
= ipa_polymorphic_call_context
3169 (t
, ie
->indirect_info
->otr_type
, anc_offset
);
3170 if (!ctx2
.useless_p ())
3171 context
.combine_with (ctx2
, ie
->indirect_info
->otr_type
);
3176 context
= ipa_polymorphic_call_context (t
, ie
->indirect_info
->otr_type
,
3178 if (ie
->indirect_info
->vptr_changed
)
3179 context
.possible_dynamic_type_change (ie
->in_polymorphic_cdtor
,
3180 ie
->indirect_info
->otr_type
);
3185 vec
<cgraph_node
*>targets
;
3188 targets
= possible_polymorphic_call_targets
3189 (ie
->indirect_info
->otr_type
,
3190 ie
->indirect_info
->otr_token
,
3192 if (!final
|| targets
.length () > 1)
3194 struct cgraph_node
*node
;
3197 if (!opt_for_fn (ie
->caller
->decl
, flag_devirtualize_speculatively
)
3198 || ie
->speculative
|| !ie
->maybe_hot_p ())
3200 node
= try_speculative_devirtualization (ie
->indirect_info
->otr_type
,
3201 ie
->indirect_info
->otr_token
,
3205 *speculative
= true;
3206 target
= node
->decl
;
3213 *speculative
= false;
3214 if (targets
.length () == 1)
3215 target
= targets
[0]->decl
;
3217 target
= ipa_impossible_devirt_target (ie
, NULL_TREE
);
3220 if (target
&& !possible_polymorphic_call_target_p (ie
,
3221 cgraph_node::get (target
)))
3225 target
= ipa_impossible_devirt_target (ie
, target
);
3231 /* If an indirect edge IE can be turned into a direct one based on data in
3232 AVALS, return the destination. Store into *SPECULATIVE a boolean determinig
3233 whether the discovered target is only speculative guess. */
3236 ipa_get_indirect_edge_target (struct cgraph_edge
*ie
,
3237 ipa_call_arg_values
*avals
,
3240 ipa_argagg_value_list
avl (avals
);
3241 return ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3242 avals
->m_known_contexts
,
3246 /* Calculate devirtualization time bonus for NODE, assuming we know information
3247 about arguments stored in AVALS. */
3250 devirtualization_time_bonus (struct cgraph_node
*node
,
3251 ipa_auto_call_arg_values
*avals
)
3253 struct cgraph_edge
*ie
;
3256 for (ie
= node
->indirect_calls
; ie
; ie
= ie
->next_callee
)
3258 struct cgraph_node
*callee
;
3259 class ipa_fn_summary
*isummary
;
3260 enum availability avail
;
3264 ipa_argagg_value_list
avl (avals
);
3265 target
= ipa_get_indirect_edge_target_1 (ie
, avals
->m_known_vals
,
3266 avals
->m_known_contexts
,
3271 /* Only bare minimum benefit for clearly un-inlineable targets. */
3273 callee
= cgraph_node::get (target
);
3274 if (!callee
|| !callee
->definition
)
3276 callee
= callee
->function_symbol (&avail
);
3277 if (avail
< AVAIL_AVAILABLE
)
3279 isummary
= ipa_fn_summaries
->get (callee
);
3280 if (!isummary
|| !isummary
->inlinable
)
3283 int size
= ipa_size_summaries
->get (callee
)->size
;
3284 /* FIXME: The values below need re-considering and perhaps also
3285 integrating into the cost metrics, at lest in some very basic way. */
3286 int max_inline_insns_auto
3287 = opt_for_fn (callee
->decl
, param_max_inline_insns_auto
);
3288 if (size
<= max_inline_insns_auto
/ 4)
3289 res
+= 31 / ((int)speculative
+ 1);
3290 else if (size
<= max_inline_insns_auto
/ 2)
3291 res
+= 15 / ((int)speculative
+ 1);
3292 else if (size
<= max_inline_insns_auto
3293 || DECL_DECLARED_INLINE_P (callee
->decl
))
3294 res
+= 7 / ((int)speculative
+ 1);
3300 /* Return time bonus incurred because of hints stored in ESTIMATES. */
3303 hint_time_bonus (cgraph_node
*node
, const ipa_call_estimates
&estimates
)
3306 ipa_hints hints
= estimates
.hints
;
3307 if (hints
& (INLINE_HINT_loop_iterations
| INLINE_HINT_loop_stride
))
3308 result
+= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3310 sreal bonus_for_one
= opt_for_fn (node
->decl
, param_ipa_cp_loop_hint_bonus
);
3312 if (hints
& INLINE_HINT_loop_iterations
)
3313 result
+= (estimates
.loops_with_known_iterations
* bonus_for_one
).to_int ();
3315 if (hints
& INLINE_HINT_loop_stride
)
3316 result
+= (estimates
.loops_with_known_strides
* bonus_for_one
).to_int ();
3321 /* If there is a reason to penalize the function described by INFO in the
3322 cloning goodness evaluation, do so. */
3325 incorporate_penalties (cgraph_node
*node
, ipa_node_params
*info
,
3328 if (info
->node_within_scc
&& !info
->node_is_self_scc
)
3329 evaluation
= (evaluation
3330 * (100 - opt_for_fn (node
->decl
,
3331 param_ipa_cp_recursion_penalty
))) / 100;
3333 if (info
->node_calling_single_call
)
3334 evaluation
= (evaluation
3335 * (100 - opt_for_fn (node
->decl
,
3336 param_ipa_cp_single_call_penalty
)))
3342 /* Return true if cloning NODE is a good idea, given the estimated TIME_BENEFIT
3343 and SIZE_COST and with the sum of frequencies of incoming edges to the
3344 potential new clone in FREQUENCIES. */
3347 good_cloning_opportunity_p (struct cgraph_node
*node
, sreal time_benefit
,
3348 sreal freq_sum
, profile_count count_sum
,
3351 if (time_benefit
== 0
3352 || !opt_for_fn (node
->decl
, flag_ipa_cp_clone
)
3353 || node
->optimize_for_size_p ())
3356 gcc_assert (size_cost
> 0);
3358 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3359 int eval_threshold
= opt_for_fn (node
->decl
, param_ipa_cp_eval_threshold
);
3360 if (count_sum
.nonzero_p ())
3362 gcc_assert (base_count
.nonzero_p ());
3363 sreal factor
= count_sum
.probability_in (base_count
).to_sreal ();
3364 sreal evaluation
= (time_benefit
* factor
) / size_cost
;
3365 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3368 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3370 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3371 "size: %i, count_sum: ", time_benefit
.to_double (),
3373 count_sum
.dump (dump_file
);
3374 fprintf (dump_file
, "%s%s) -> evaluation: %.2f, threshold: %i\n",
3375 info
->node_within_scc
3376 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3377 info
->node_calling_single_call
? ", single_call" : "",
3378 evaluation
.to_double (), eval_threshold
);
3381 return evaluation
.to_int () >= eval_threshold
;
3385 sreal evaluation
= (time_benefit
* freq_sum
) / size_cost
;
3386 evaluation
= incorporate_penalties (node
, info
, evaluation
);
3389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3390 fprintf (dump_file
, " good_cloning_opportunity_p (time: %g, "
3391 "size: %i, freq_sum: %g%s%s) -> evaluation: %.2f, "
3393 time_benefit
.to_double (), size_cost
, freq_sum
.to_double (),
3394 info
->node_within_scc
3395 ? (info
->node_is_self_scc
? ", self_scc" : ", scc") : "",
3396 info
->node_calling_single_call
? ", single_call" : "",
3397 evaluation
.to_double (), eval_threshold
);
3399 return evaluation
.to_int () >= eval_threshold
;
3403 /* Grow vectors in AVALS and fill them with information about values of
3404 parameters that are known to be independent of the context. Only calculate
3405 m_known_aggs if CALCULATE_AGGS is true. INFO describes the function. If
3406 REMOVABLE_PARAMS_COST is non-NULL, the movement cost of all removable
3407 parameters will be stored in it.
3409 TODO: Also grow context independent value range vectors. */
3412 gather_context_independent_values (class ipa_node_params
*info
,
3413 ipa_auto_call_arg_values
*avals
,
3414 bool calculate_aggs
,
3415 int *removable_params_cost
)
3417 int i
, count
= ipa_get_param_count (info
);
3420 avals
->m_known_vals
.safe_grow_cleared (count
, true);
3421 avals
->m_known_contexts
.safe_grow_cleared (count
, true);
3423 if (removable_params_cost
)
3424 *removable_params_cost
= 0;
3426 for (i
= 0; i
< count
; i
++)
3428 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3429 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3431 if (lat
->is_single_const ())
3433 ipcp_value
<tree
> *val
= lat
->values
;
3434 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3435 avals
->m_known_vals
[i
] = val
->value
;
3436 if (removable_params_cost
)
3437 *removable_params_cost
3438 += estimate_move_cost (TREE_TYPE (val
->value
), false);
3441 else if (removable_params_cost
3442 && !ipa_is_param_used (info
, i
))
3443 *removable_params_cost
3444 += ipa_get_param_move_cost (info
, i
);
3446 if (!ipa_is_param_used (info
, i
))
3449 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3450 /* Do not account known context as reason for cloning. We can see
3451 if it permits devirtualization. */
3452 if (ctxlat
->is_single_const ())
3453 avals
->m_known_contexts
[i
] = ctxlat
->values
->value
;
3456 ret
|= push_agg_values_from_plats (plats
, i
, 0, &avals
->m_known_aggs
);
3462 /* Perform time and size measurement of NODE with the context given in AVALS,
3463 calculate the benefit compared to the node without specialization and store
3464 it into VAL. Take into account REMOVABLE_PARAMS_COST of all
3465 context-independent or unused removable parameters and EST_MOVE_COST, the
3466 estimated movement of the considered parameter. */
3469 perform_estimation_of_a_value (cgraph_node
*node
,
3470 ipa_auto_call_arg_values
*avals
,
3471 int removable_params_cost
, int est_move_cost
,
3472 ipcp_value_base
*val
)
3475 ipa_call_estimates estimates
;
3477 estimate_ipcp_clone_size_and_time (node
, avals
, &estimates
);
3479 /* Extern inline functions have no cloning local time benefits because they
3480 will be inlined anyway. The only reason to clone them is if it enables
3481 optimization in any of the functions they call. */
3482 if (DECL_EXTERNAL (node
->decl
) && DECL_DECLARED_INLINE_P (node
->decl
))
3485 time_benefit
= (estimates
.nonspecialized_time
- estimates
.time
)
3486 + (devirtualization_time_bonus (node
, avals
)
3487 + hint_time_bonus (node
, estimates
)
3488 + removable_params_cost
+ est_move_cost
);
3490 int size
= estimates
.size
;
3491 gcc_checking_assert (size
>=0);
3492 /* The inliner-heuristics based estimates may think that in certain
3493 contexts some functions do not have any size at all but we want
3494 all specializations to have at least a tiny cost, not least not to
3499 val
->local_time_benefit
= time_benefit
;
3500 val
->local_size_cost
= size
;
3503 /* Get the overall limit oof growth based on parameters extracted from growth.
3504 it does not really make sense to mix functions with different overall growth
3505 limits but it is possible and if it happens, we do not want to select one
3509 get_max_overall_size (cgraph_node
*node
)
3511 long max_new_size
= orig_overall_size
;
3512 long large_unit
= opt_for_fn (node
->decl
, param_ipa_cp_large_unit_insns
);
3513 if (max_new_size
< large_unit
)
3514 max_new_size
= large_unit
;
3515 int unit_growth
= opt_for_fn (node
->decl
, param_ipa_cp_unit_growth
);
3516 max_new_size
+= max_new_size
* unit_growth
/ 100 + 1;
3517 return max_new_size
;
3520 /* Return true if NODE should be cloned just for a parameter removal, possibly
3521 dumping a reason if not. */
3524 clone_for_param_removal_p (cgraph_node
*node
)
3526 if (!node
->can_change_signature
)
3528 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3529 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3530 "function cannot change signature.\n");
3533 if (node
->can_be_local_p ())
3535 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3536 fprintf (dump_file
, " Not considering cloning to remove parameters, "
3537 "IPA-SRA can do it potentially better.\n");
3543 /* Iterate over known values of parameters of NODE and estimate the local
3544 effects in terms of time and size they have. */
3547 estimate_local_effects (struct cgraph_node
*node
)
3549 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3550 int count
= ipa_get_param_count (info
);
3552 int removable_params_cost
;
3554 if (!count
|| !ipcp_versionable_function_p (node
))
3557 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3558 fprintf (dump_file
, "\nEstimating effects for %s.\n", node
->dump_name ());
3560 ipa_auto_call_arg_values avals
;
3561 always_const
= gather_context_independent_values (info
, &avals
, true,
3562 &removable_params_cost
);
3563 int devirt_bonus
= devirtualization_time_bonus (node
, &avals
);
3564 if (always_const
|| devirt_bonus
3565 || (removable_params_cost
&& clone_for_param_removal_p (node
)))
3567 struct caller_statistics stats
;
3568 ipa_call_estimates estimates
;
3570 init_caller_stats (&stats
);
3571 node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
3573 estimate_ipcp_clone_size_and_time (node
, &avals
, &estimates
);
3574 sreal time
= estimates
.nonspecialized_time
- estimates
.time
;
3575 time
+= devirt_bonus
;
3576 time
+= hint_time_bonus (node
, estimates
);
3577 time
+= removable_params_cost
;
3578 int size
= estimates
.size
- stats
.n_calls
* removable_params_cost
;
3581 fprintf (dump_file
, " - context independent values, size: %i, "
3582 "time_benefit: %f\n", size
, (time
).to_double ());
3584 if (size
<= 0 || node
->local
)
3586 info
->do_clone_for_all_contexts
= true;
3589 fprintf (dump_file
, " Decided to specialize for all "
3590 "known contexts, code not going to grow.\n");
3592 else if (good_cloning_opportunity_p (node
, time
, stats
.freq_sum
,
3593 stats
.count_sum
, size
))
3595 if (size
+ overall_size
<= get_max_overall_size (node
))
3597 info
->do_clone_for_all_contexts
= true;
3598 overall_size
+= size
;
3601 fprintf (dump_file
, " Decided to specialize for all "
3602 "known contexts, growth (to %li) deemed "
3603 "beneficial.\n", overall_size
);
3605 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3606 fprintf (dump_file
, " Not cloning for all contexts because "
3607 "maximum unit size would be reached with %li.\n",
3608 size
+ overall_size
);
3610 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3611 fprintf (dump_file
, " Not cloning for all contexts because "
3612 "!good_cloning_opportunity_p.\n");
3616 for (int i
= 0; i
< count
; i
++)
3618 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3619 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3620 ipcp_value
<tree
> *val
;
3624 || avals
.m_known_vals
[i
])
3627 for (val
= lat
->values
; val
; val
= val
->next
)
3629 gcc_checking_assert (TREE_CODE (val
->value
) != TREE_BINFO
);
3630 avals
.m_known_vals
[i
] = val
->value
;
3632 int emc
= estimate_move_cost (TREE_TYPE (val
->value
), true);
3633 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3636 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3638 fprintf (dump_file
, " - estimates for value ");
3639 print_ipcp_constant_value (dump_file
, val
->value
);
3640 fprintf (dump_file
, " for ");
3641 ipa_dump_param (dump_file
, info
, i
);
3642 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3643 val
->local_time_benefit
.to_double (),
3644 val
->local_size_cost
);
3647 avals
.m_known_vals
[i
] = NULL_TREE
;
3650 for (int i
= 0; i
< count
; i
++)
3652 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3654 if (!plats
->virt_call
)
3657 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3658 ipcp_value
<ipa_polymorphic_call_context
> *val
;
3662 || !avals
.m_known_contexts
[i
].useless_p ())
3665 for (val
= ctxlat
->values
; val
; val
= val
->next
)
3667 avals
.m_known_contexts
[i
] = val
->value
;
3668 perform_estimation_of_a_value (node
, &avals
, removable_params_cost
,
3671 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3673 fprintf (dump_file
, " - estimates for polymorphic context ");
3674 print_ipcp_constant_value (dump_file
, val
->value
);
3675 fprintf (dump_file
, " for ");
3676 ipa_dump_param (dump_file
, info
, i
);
3677 fprintf (dump_file
, ": time_benefit: %g, size: %i\n",
3678 val
->local_time_benefit
.to_double (),
3679 val
->local_size_cost
);
3682 avals
.m_known_contexts
[i
] = ipa_polymorphic_call_context ();
3685 unsigned all_ctx_len
= avals
.m_known_aggs
.length ();
3686 auto_vec
<ipa_argagg_value
, 32> all_ctx
;
3687 all_ctx
.reserve_exact (all_ctx_len
);
3688 all_ctx
.splice (avals
.m_known_aggs
);
3689 avals
.m_known_aggs
.safe_grow_cleared (all_ctx_len
+ 1);
3692 for (int index
= 0; index
< count
; index
++)
3694 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, index
);
3696 if (plats
->aggs_bottom
|| !plats
->aggs
)
3699 for (ipcp_agg_lattice
*aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3701 ipcp_value
<tree
> *val
;
3702 if (aglat
->bottom
|| !aglat
->values
3703 /* If the following is true, the one value is already part of all
3704 context estimations. */
3705 || (!plats
->aggs_contain_variable
3706 && aglat
->is_single_const ()))
3709 unsigned unit_offset
= aglat
->offset
/ BITS_PER_UNIT
;
3710 while (j
< all_ctx_len
3711 && (all_ctx
[j
].index
< index
3712 || (all_ctx
[j
].index
== index
3713 && all_ctx
[j
].unit_offset
< unit_offset
)))
3715 avals
.m_known_aggs
[j
] = all_ctx
[j
];
3719 for (unsigned k
= j
; k
< all_ctx_len
; k
++)
3720 avals
.m_known_aggs
[k
+1] = all_ctx
[k
];
3722 for (val
= aglat
->values
; val
; val
= val
->next
)
3724 avals
.m_known_aggs
[j
].value
= val
->value
;
3725 avals
.m_known_aggs
[j
].unit_offset
= unit_offset
;
3726 avals
.m_known_aggs
[j
].index
= index
;
3727 avals
.m_known_aggs
[j
].by_ref
= plats
->aggs_by_ref
;
3728 avals
.m_known_aggs
[j
].killed
= false;
3730 perform_estimation_of_a_value (node
, &avals
,
3731 removable_params_cost
, 0, val
);
3733 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3735 fprintf (dump_file
, " - estimates for value ");
3736 print_ipcp_constant_value (dump_file
, val
->value
);
3737 fprintf (dump_file
, " for ");
3738 ipa_dump_param (dump_file
, info
, index
);
3739 fprintf (dump_file
, "[%soffset: " HOST_WIDE_INT_PRINT_DEC
3740 "]: time_benefit: %g, size: %i\n",
3741 plats
->aggs_by_ref
? "ref " : "",
3743 val
->local_time_benefit
.to_double (),
3744 val
->local_size_cost
);
3752 /* Add value CUR_VAL and all yet-unsorted values it is dependent on to the
3753 topological sort of values. */
3755 template <typename valtype
>
3757 value_topo_info
<valtype
>::add_val (ipcp_value
<valtype
> *cur_val
)
3759 ipcp_value_source
<valtype
> *src
;
3765 cur_val
->dfs
= dfs_counter
;
3766 cur_val
->low_link
= dfs_counter
;
3768 cur_val
->topo_next
= stack
;
3770 cur_val
->on_stack
= true;
3772 for (src
= cur_val
->sources
; src
; src
= src
->next
)
3775 if (src
->val
->dfs
== 0)
3778 if (src
->val
->low_link
< cur_val
->low_link
)
3779 cur_val
->low_link
= src
->val
->low_link
;
3781 else if (src
->val
->on_stack
3782 && src
->val
->dfs
< cur_val
->low_link
)
3783 cur_val
->low_link
= src
->val
->dfs
;
3786 if (cur_val
->dfs
== cur_val
->low_link
)
3788 ipcp_value
<valtype
> *v
, *scc_list
= NULL
;
3793 stack
= v
->topo_next
;
3794 v
->on_stack
= false;
3795 v
->scc_no
= cur_val
->dfs
;
3797 v
->scc_next
= scc_list
;
3800 while (v
!= cur_val
);
3802 cur_val
->topo_next
= values_topo
;
3803 values_topo
= cur_val
;
3807 /* Add all values in lattices associated with NODE to the topological sort if
3808 they are not there yet. */
3811 add_all_node_vals_to_toposort (cgraph_node
*node
, ipa_topo_info
*topo
)
3813 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
3814 int i
, count
= ipa_get_param_count (info
);
3816 for (i
= 0; i
< count
; i
++)
3818 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
3819 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
3820 struct ipcp_agg_lattice
*aglat
;
3824 ipcp_value
<tree
> *val
;
3825 for (val
= lat
->values
; val
; val
= val
->next
)
3826 topo
->constants
.add_val (val
);
3829 if (!plats
->aggs_bottom
)
3830 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
3833 ipcp_value
<tree
> *val
;
3834 for (val
= aglat
->values
; val
; val
= val
->next
)
3835 topo
->constants
.add_val (val
);
3838 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
3839 if (!ctxlat
->bottom
)
3841 ipcp_value
<ipa_polymorphic_call_context
> *ctxval
;
3842 for (ctxval
= ctxlat
->values
; ctxval
; ctxval
= ctxval
->next
)
3843 topo
->contexts
.add_val (ctxval
);
3848 /* One pass of constants propagation along the call graph edges, from callers
3849 to callees (requires topological ordering in TOPO), iterate over strongly
3850 connected components. */
3853 propagate_constants_topo (class ipa_topo_info
*topo
)
3857 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
3860 struct cgraph_node
*v
, *node
= topo
->order
[i
];
3861 vec
<cgraph_node
*> cycle_nodes
= ipa_get_nodes_in_cycle (node
);
3863 /* First, iteratively propagate within the strongly connected component
3864 until all lattices stabilize. */
3865 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3866 if (v
->has_gimple_body_p ())
3868 if (opt_for_fn (v
->decl
, flag_ipa_cp
)
3869 && opt_for_fn (v
->decl
, optimize
))
3870 push_node_to_stack (topo
, v
);
3871 /* When V is not optimized, we can not push it to stack, but
3872 still we need to set all its callees lattices to bottom. */
3875 for (cgraph_edge
*cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3876 propagate_constants_across_call (cs
);
3880 v
= pop_node_from_stack (topo
);
3883 struct cgraph_edge
*cs
;
3884 class ipa_node_params
*info
= NULL
;
3885 bool self_scc
= true;
3887 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3888 if (ipa_edge_within_scc (cs
))
3890 cgraph_node
*callee
= cs
->callee
->function_symbol ();
3897 info
= ipa_node_params_sum
->get (v
);
3898 info
->node_within_scc
= true;
3901 if (propagate_constants_across_call (cs
))
3902 push_node_to_stack (topo
, callee
);
3906 info
->node_is_self_scc
= self_scc
;
3908 v
= pop_node_from_stack (topo
);
3911 /* Afterwards, propagate along edges leading out of the SCC, calculates
3912 the local effects of the discovered constants and all valid values to
3913 their topological sort. */
3914 FOR_EACH_VEC_ELT (cycle_nodes
, j
, v
)
3915 if (v
->has_gimple_body_p ()
3916 && opt_for_fn (v
->decl
, flag_ipa_cp
)
3917 && opt_for_fn (v
->decl
, optimize
))
3919 struct cgraph_edge
*cs
;
3921 estimate_local_effects (v
);
3922 add_all_node_vals_to_toposort (v
, topo
);
3923 for (cs
= v
->callees
; cs
; cs
= cs
->next_callee
)
3924 if (!ipa_edge_within_scc (cs
))
3925 propagate_constants_across_call (cs
);
3927 cycle_nodes
.release ();
3931 /* Propagate the estimated effects of individual values along the topological
3932 from the dependent values to those they depend on. */
3934 template <typename valtype
>
3936 value_topo_info
<valtype
>::propagate_effects ()
3938 ipcp_value
<valtype
> *base
;
3939 hash_set
<ipcp_value
<valtype
> *> processed_srcvals
;
3941 for (base
= values_topo
; base
; base
= base
->topo_next
)
3943 ipcp_value_source
<valtype
> *src
;
3944 ipcp_value
<valtype
> *val
;
3946 HOST_WIDE_INT size
= 0;
3948 for (val
= base
; val
; val
= val
->scc_next
)
3950 time
= time
+ val
->local_time_benefit
+ val
->prop_time_benefit
;
3951 size
= size
+ val
->local_size_cost
+ val
->prop_size_cost
;
3954 for (val
= base
; val
; val
= val
->scc_next
)
3956 processed_srcvals
.empty ();
3957 for (src
= val
->sources
; src
; src
= src
->next
)
3959 && src
->cs
->maybe_hot_p ())
3961 if (!processed_srcvals
.add (src
->val
))
3963 HOST_WIDE_INT prop_size
= size
+ src
->val
->prop_size_cost
;
3964 if (prop_size
< INT_MAX
)
3965 src
->val
->prop_size_cost
= prop_size
;
3970 int special_factor
= 1;
3971 if (val
->same_scc (src
->val
))
3973 = opt_for_fn(src
->cs
->caller
->decl
,
3974 param_ipa_cp_recursive_freq_factor
);
3975 else if (val
->self_recursion_generated_p ()
3976 && (src
->cs
->callee
->function_symbol ()
3977 == src
->cs
->caller
))
3979 int max_recur_gen_depth
3980 = opt_for_fn(src
->cs
->caller
->decl
,
3981 param_ipa_cp_max_recursive_depth
);
3982 special_factor
= max_recur_gen_depth
3983 - val
->self_recursion_generated_level
+ 1;
3986 src
->val
->prop_time_benefit
3987 += time
* special_factor
* src
->cs
->sreal_frequency ();
3992 val
->prop_time_benefit
= time
;
3993 val
->prop_size_cost
= size
;
3997 val
->prop_time_benefit
= 0;
3998 val
->prop_size_cost
= 0;
4004 /* Callback for qsort to sort counts of all edges. */
4007 compare_edge_profile_counts (const void *a
, const void *b
)
4009 const profile_count
*cnt1
= (const profile_count
*) a
;
4010 const profile_count
*cnt2
= (const profile_count
*) b
;
4020 /* Propagate constants, polymorphic contexts and their effects from the
4021 summaries interprocedurally. */
4024 ipcp_propagate_stage (class ipa_topo_info
*topo
)
4026 struct cgraph_node
*node
;
4029 fprintf (dump_file
, "\n Propagating constants:\n\n");
4031 base_count
= profile_count::uninitialized ();
4033 bool compute_count_base
= false;
4034 unsigned base_count_pos_percent
= 0;
4035 FOR_EACH_DEFINED_FUNCTION (node
)
4037 if (node
->has_gimple_body_p ()
4038 && opt_for_fn (node
->decl
, flag_ipa_cp
)
4039 && opt_for_fn (node
->decl
, optimize
))
4041 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4042 determine_versionability (node
, info
);
4044 unsigned nlattices
= ipa_get_param_count (info
);
4045 info
->lattices
.safe_grow_cleared (nlattices
, true);
4046 initialize_node_lattices (node
);
4048 ipa_size_summary
*s
= ipa_size_summaries
->get (node
);
4049 if (node
->definition
&& !node
->alias
&& s
!= NULL
)
4050 overall_size
+= s
->self_size
;
4051 if (node
->count
.ipa ().initialized_p ())
4053 compute_count_base
= true;
4054 unsigned pos_percent
= opt_for_fn (node
->decl
,
4055 param_ipa_cp_profile_count_base
);
4056 base_count_pos_percent
= MAX (base_count_pos_percent
, pos_percent
);
4060 if (compute_count_base
)
4062 auto_vec
<profile_count
> all_edge_counts
;
4063 all_edge_counts
.reserve_exact (symtab
->edges_count
);
4064 FOR_EACH_DEFINED_FUNCTION (node
)
4065 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4067 profile_count count
= cs
->count
.ipa ();
4068 if (!count
.nonzero_p ())
4071 enum availability avail
;
4073 = cs
->callee
->function_or_virtual_thunk_symbol (&avail
);
4074 ipa_node_params
*info
= ipa_node_params_sum
->get (tgt
);
4075 if (info
&& info
->versionable
)
4076 all_edge_counts
.quick_push (count
);
4079 if (!all_edge_counts
.is_empty ())
4081 gcc_assert (base_count_pos_percent
<= 100);
4082 all_edge_counts
.qsort (compare_edge_profile_counts
);
4084 unsigned base_count_pos
4085 = ((all_edge_counts
.length () * (base_count_pos_percent
)) / 100);
4086 base_count
= all_edge_counts
[base_count_pos
];
4090 fprintf (dump_file
, "\nSelected base_count from %u edges at "
4091 "position %u, arriving at: ", all_edge_counts
.length (),
4093 base_count
.dump (dump_file
);
4094 fprintf (dump_file
, "\n");
4098 fprintf (dump_file
, "\nNo candidates with non-zero call count found, "
4099 "continuing as if without profile feedback.\n");
4102 orig_overall_size
= overall_size
;
4105 fprintf (dump_file
, "\noverall_size: %li\n", overall_size
);
4107 propagate_constants_topo (topo
);
4109 ipcp_verify_propagated_values ();
4110 topo
->constants
.propagate_effects ();
4111 topo
->contexts
.propagate_effects ();
4115 fprintf (dump_file
, "\nIPA lattices after all propagation:\n");
4116 print_all_lattices (dump_file
, (dump_flags
& TDF_DETAILS
), true);
4120 /* Discover newly direct outgoing edges from NODE which is a new clone with
4121 known KNOWN_CSTS and make them direct. */
4124 ipcp_discover_new_direct_edges (struct cgraph_node
*node
,
4125 vec
<tree
> known_csts
,
4126 vec
<ipa_polymorphic_call_context
>
4128 vec
<ipa_argagg_value
, va_gc
> *aggvals
)
4130 struct cgraph_edge
*ie
, *next_ie
;
4133 for (ie
= node
->indirect_calls
; ie
; ie
= next_ie
)
4138 next_ie
= ie
->next_callee
;
4139 ipa_argagg_value_list
avs (aggvals
);
4140 target
= ipa_get_indirect_edge_target_1 (ie
, known_csts
, known_contexts
,
4144 bool agg_contents
= ie
->indirect_info
->agg_contents
;
4145 bool polymorphic
= ie
->indirect_info
->polymorphic
;
4146 int param_index
= ie
->indirect_info
->param_index
;
4147 struct cgraph_edge
*cs
= ipa_make_edge_direct_to_target (ie
, target
,
4151 if (cs
&& !agg_contents
&& !polymorphic
)
4153 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
4154 int c
= ipa_get_controlled_uses (info
, param_index
);
4155 if (c
!= IPA_UNDESCRIBED_USE
4156 && !ipa_get_param_load_dereferenced (info
, param_index
))
4158 struct ipa_ref
*to_del
;
4161 ipa_set_controlled_uses (info
, param_index
, c
);
4162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4163 fprintf (dump_file
, " controlled uses count of param "
4164 "%i bumped down to %i\n", param_index
, c
);
4166 && (to_del
= node
->find_reference (cs
->callee
, NULL
, 0,
4169 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4170 fprintf (dump_file
, " and even removing its "
4171 "cloning-created reference\n");
4172 to_del
->remove_reference ();
4178 /* Turning calls to direct calls will improve overall summary. */
4180 ipa_update_overall_fn_summary (node
);
4183 class edge_clone_summary
;
4184 static call_summary
<edge_clone_summary
*> *edge_clone_summaries
= NULL
;
4186 /* Edge clone summary. */
4188 class edge_clone_summary
4191 /* Default constructor. */
4192 edge_clone_summary (): prev_clone (NULL
), next_clone (NULL
) {}
4194 /* Default destructor. */
4195 ~edge_clone_summary ()
4198 edge_clone_summaries
->get (prev_clone
)->next_clone
= next_clone
;
4200 edge_clone_summaries
->get (next_clone
)->prev_clone
= prev_clone
;
4203 cgraph_edge
*prev_clone
;
4204 cgraph_edge
*next_clone
;
4207 class edge_clone_summary_t
:
4208 public call_summary
<edge_clone_summary
*>
4211 edge_clone_summary_t (symbol_table
*symtab
):
4212 call_summary
<edge_clone_summary
*> (symtab
)
4214 m_initialize_when_cloning
= true;
4217 void duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4218 edge_clone_summary
*src_data
,
4219 edge_clone_summary
*dst_data
) final override
;
4222 /* Edge duplication hook. */
4225 edge_clone_summary_t::duplicate (cgraph_edge
*src_edge
, cgraph_edge
*dst_edge
,
4226 edge_clone_summary
*src_data
,
4227 edge_clone_summary
*dst_data
)
4229 if (src_data
->next_clone
)
4230 edge_clone_summaries
->get (src_data
->next_clone
)->prev_clone
= dst_edge
;
4231 dst_data
->prev_clone
= src_edge
;
4232 dst_data
->next_clone
= src_data
->next_clone
;
4233 src_data
->next_clone
= dst_edge
;
4236 /* Return true is CS calls DEST or its clone for all contexts. When
4237 ALLOW_RECURSION_TO_CLONE is false, also return false for self-recursive
4238 edges from/to an all-context clone. */
4241 calls_same_node_or_its_all_contexts_clone_p (cgraph_edge
*cs
, cgraph_node
*dest
,
4242 bool allow_recursion_to_clone
)
4244 enum availability availability
;
4245 cgraph_node
*callee
= cs
->callee
->function_symbol (&availability
);
4247 if (availability
<= AVAIL_INTERPOSABLE
)
4251 if (!allow_recursion_to_clone
&& cs
->caller
== callee
)
4254 ipa_node_params
*info
= ipa_node_params_sum
->get (callee
);
4255 return info
->is_all_contexts_clone
&& info
->ipcp_orig_node
== dest
;
4258 /* Return true if edge CS does bring about the value described by SRC to
4259 DEST_VAL of node DEST or its clone for all contexts. */
4262 cgraph_edge_brings_value_p (cgraph_edge
*cs
, ipcp_value_source
<tree
> *src
,
4263 cgraph_node
*dest
, ipcp_value
<tree
> *dest_val
)
4265 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4267 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, !src
->val
)
4268 || caller_info
->node_dead
)
4274 if (caller_info
->ipcp_orig_node
)
4277 if (src
->offset
== -1)
4278 t
= caller_info
->known_csts
[src
->index
];
4279 else if (ipcp_transformation
*ts
4280 = ipcp_get_transformation_summary (cs
->caller
))
4282 ipa_argagg_value_list
avl (ts
);
4283 t
= avl
.get_value (src
->index
, src
->offset
/ BITS_PER_UNIT
);
4285 return (t
!= NULL_TREE
4286 && values_equal_for_ipcp_p (src
->val
->value
, t
));
4290 if (src
->val
== dest_val
)
4293 struct ipcp_agg_lattice
*aglat
;
4294 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4296 if (src
->offset
== -1)
4297 return (plats
->itself
.is_single_const ()
4298 && values_equal_for_ipcp_p (src
->val
->value
,
4299 plats
->itself
.values
->value
));
4302 if (plats
->aggs_bottom
|| plats
->aggs_contain_variable
)
4304 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
4305 if (aglat
->offset
== src
->offset
)
4306 return (aglat
->is_single_const ()
4307 && values_equal_for_ipcp_p (src
->val
->value
,
4308 aglat
->values
->value
));
4314 /* Return true if edge CS does bring about the value described by SRC to
4315 DST_VAL of node DEST or its clone for all contexts. */
4318 cgraph_edge_brings_value_p (cgraph_edge
*cs
,
4319 ipcp_value_source
<ipa_polymorphic_call_context
> *src
,
4321 ipcp_value
<ipa_polymorphic_call_context
> *)
4323 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
4325 if (!calls_same_node_or_its_all_contexts_clone_p (cs
, dest
, true)
4326 || caller_info
->node_dead
)
4331 if (caller_info
->ipcp_orig_node
)
4332 return (caller_info
->known_contexts
.length () > (unsigned) src
->index
)
4333 && values_equal_for_ipcp_p (src
->val
->value
,
4334 caller_info
->known_contexts
[src
->index
]);
4336 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (caller_info
,
4338 return plats
->ctxlat
.is_single_const ()
4339 && values_equal_for_ipcp_p (src
->val
->value
,
4340 plats
->ctxlat
.values
->value
);
4343 /* Get the next clone in the linked list of clones of an edge. */
4345 static inline struct cgraph_edge
*
4346 get_next_cgraph_edge_clone (struct cgraph_edge
*cs
)
4348 edge_clone_summary
*s
= edge_clone_summaries
->get (cs
);
4349 return s
!= NULL
? s
->next_clone
: NULL
;
4352 /* Given VAL that is intended for DEST, iterate over all its sources and if any
4353 of them is viable and hot, return true. In that case, for those that still
4354 hold, add their edge frequency and their number and cumulative profile
4355 counts of self-ecursive and other edges into *FREQUENCY, *CALLER_COUNT,
4356 REC_COUNT_SUM and NONREC_COUNT_SUM respectively. */
4358 template <typename valtype
>
4360 get_info_about_necessary_edges (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4361 sreal
*freq_sum
, int *caller_count
,
4362 profile_count
*rec_count_sum
,
4363 profile_count
*nonrec_count_sum
)
4365 ipcp_value_source
<valtype
> *src
;
4368 profile_count rec_cnt
= profile_count::zero ();
4369 profile_count nonrec_cnt
= profile_count::zero ();
4371 bool non_self_recursive
= false;
4373 for (src
= val
->sources
; src
; src
= src
->next
)
4375 struct cgraph_edge
*cs
= src
->cs
;
4378 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4381 freq
+= cs
->sreal_frequency ();
4382 hot
|= cs
->maybe_hot_p ();
4383 if (cs
->caller
!= dest
)
4385 non_self_recursive
= true;
4386 if (cs
->count
.ipa ().initialized_p ())
4387 rec_cnt
+= cs
->count
.ipa ();
4389 else if (cs
->count
.ipa ().initialized_p ())
4390 nonrec_cnt
+= cs
->count
.ipa ();
4392 cs
= get_next_cgraph_edge_clone (cs
);
4396 /* If the only edges bringing a value are self-recursive ones, do not bother
4398 if (!non_self_recursive
)
4402 *caller_count
= count
;
4403 *rec_count_sum
= rec_cnt
;
4404 *nonrec_count_sum
= nonrec_cnt
;
4406 if (!hot
&& ipa_node_params_sum
->get (dest
)->node_within_scc
)
4408 struct cgraph_edge
*cs
;
4410 /* Cold non-SCC source edge could trigger hot recursive execution of
4411 function. Consider the case as hot and rely on following cost model
4412 computation to further select right one. */
4413 for (cs
= dest
->callers
; cs
; cs
= cs
->next_caller
)
4414 if (cs
->caller
== dest
&& cs
->maybe_hot_p ())
4421 /* Given a NODE, and a set of its CALLERS, try to adjust order of the callers
4422 to let a non-self-recursive caller be the first element. Thus, we can
4423 simplify intersecting operations on values that arrive from all of these
4424 callers, especially when there exists self-recursive call. Return true if
4425 this kind of adjustment is possible. */
4428 adjust_callers_for_value_intersection (vec
<cgraph_edge
*> &callers
,
4431 for (unsigned i
= 0; i
< callers
.length (); i
++)
4433 cgraph_edge
*cs
= callers
[i
];
4435 if (cs
->caller
!= node
)
4439 callers
[i
] = callers
[0];
4448 /* Return a vector of incoming edges that do bring value VAL to node DEST. It
4449 is assumed their number is known and equal to CALLER_COUNT. */
4451 template <typename valtype
>
4452 static vec
<cgraph_edge
*>
4453 gather_edges_for_value (ipcp_value
<valtype
> *val
, cgraph_node
*dest
,
4456 ipcp_value_source
<valtype
> *src
;
4457 vec
<cgraph_edge
*> ret
;
4459 ret
.create (caller_count
);
4460 for (src
= val
->sources
; src
; src
= src
->next
)
4462 struct cgraph_edge
*cs
= src
->cs
;
4465 if (cgraph_edge_brings_value_p (cs
, src
, dest
, val
))
4466 ret
.quick_push (cs
);
4467 cs
= get_next_cgraph_edge_clone (cs
);
4471 if (caller_count
> 1)
4472 adjust_callers_for_value_intersection (ret
, dest
);
4477 /* Construct a replacement map for a know VALUE for a formal parameter PARAM.
4478 Return it or NULL if for some reason it cannot be created. FORCE_LOAD_REF
4479 should be set to true when the reference created for the constant should be
4480 a load one and not an address one because the corresponding parameter p is
4483 static struct ipa_replace_map
*
4484 get_replacement_map (class ipa_node_params
*info
, tree value
, int parm_num
,
4485 bool force_load_ref
)
4487 struct ipa_replace_map
*replace_map
;
4489 replace_map
= ggc_alloc
<ipa_replace_map
> ();
4492 fprintf (dump_file
, " replacing ");
4493 ipa_dump_param (dump_file
, info
, parm_num
);
4495 fprintf (dump_file
, " with const ");
4496 print_generic_expr (dump_file
, value
);
4499 fprintf (dump_file
, " - forcing load reference\n");
4501 fprintf (dump_file
, "\n");
4503 replace_map
->parm_num
= parm_num
;
4504 replace_map
->new_tree
= value
;
4505 replace_map
->force_load_ref
= force_load_ref
;
4509 /* Dump new profiling counts of NODE. SPEC is true when NODE is a specialzied
4510 one, otherwise it will be referred to as the original node. */
4513 dump_profile_updates (cgraph_node
*node
, bool spec
)
4516 fprintf (dump_file
, " setting count of the specialized node %s to ",
4517 node
->dump_name ());
4519 fprintf (dump_file
, " setting count of the original node %s to ",
4520 node
->dump_name ());
4522 node
->count
.dump (dump_file
);
4523 fprintf (dump_file
, "\n");
4524 for (cgraph_edge
*cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
4526 fprintf (dump_file
, " edge to %s has count ",
4527 cs
->callee
->dump_name ());
4528 cs
->count
.dump (dump_file
);
4529 fprintf (dump_file
, "\n");
4533 /* With partial train run we do not want to assume that original's count is
4534 zero whenever we redurect all executed edges to clone. Simply drop profile
4535 to local one in this case. In eany case, return the new value. ORIG_NODE
4536 is the original node and its count has not been updaed yet. */
4539 lenient_count_portion_handling (profile_count remainder
, cgraph_node
*orig_node
)
4541 if (remainder
.ipa_p () && !remainder
.ipa ().nonzero_p ()
4542 && orig_node
->count
.ipa_p () && orig_node
->count
.ipa ().nonzero_p ()
4543 && opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4544 remainder
= remainder
.guessed_local ();
4549 /* Structure to sum counts coming from nodes other than the original node and
4552 struct gather_other_count_struct
4555 profile_count other_count
;
4558 /* Worker callback of call_for_symbol_thunks_and_aliases summing the number of
4559 counts that come from non-self-recursive calls.. */
4562 gather_count_of_non_rec_edges (cgraph_node
*node
, void *data
)
4564 gather_other_count_struct
*desc
= (gather_other_count_struct
*) data
;
4565 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4566 if (cs
->caller
!= desc
->orig
&& cs
->caller
->clone_of
!= desc
->orig
)
4567 desc
->other_count
+= cs
->count
.ipa ();
4571 /* Structure to help analyze if we need to boost counts of some clones of some
4572 non-recursive edges to match the new callee count. */
4574 struct desc_incoming_count_struct
4577 hash_set
<cgraph_edge
*> *processed_edges
;
4578 profile_count count
;
4579 unsigned unproc_orig_rec_edges
;
4582 /* Go over edges calling NODE and its thunks and gather information about
4583 incoming counts so that we know if we need to make any adjustments. */
4586 analyze_clone_icoming_counts (cgraph_node
*node
,
4587 desc_incoming_count_struct
*desc
)
4589 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4590 if (cs
->caller
->thunk
)
4592 analyze_clone_icoming_counts (cs
->caller
, desc
);
4597 if (cs
->count
.initialized_p ())
4598 desc
->count
+= cs
->count
.ipa ();
4599 if (!desc
->processed_edges
->contains (cs
)
4600 && cs
->caller
->clone_of
== desc
->orig
)
4601 desc
->unproc_orig_rec_edges
++;
4605 /* If caller edge counts of a clone created for a self-recursive arithmetic
4606 jump function must be adjusted because it is coming from a the "seed" clone
4607 for the first value and so has been excessively scaled back as if it was not
4608 a recursive call, adjust it so that the incoming counts of NODE match its
4609 count. NODE is the node or its thunk. */
4612 adjust_clone_incoming_counts (cgraph_node
*node
,
4613 desc_incoming_count_struct
*desc
)
4615 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4616 if (cs
->caller
->thunk
)
4618 adjust_clone_incoming_counts (cs
->caller
, desc
);
4619 profile_count sum
= profile_count::zero ();
4620 for (cgraph_edge
*e
= cs
->caller
->callers
; e
; e
= e
->next_caller
)
4621 if (e
->count
.initialized_p ())
4622 sum
+= e
->count
.ipa ();
4623 cs
->count
= cs
->count
.combine_with_ipa_count (sum
);
4625 else if (!desc
->processed_edges
->contains (cs
)
4626 && cs
->caller
->clone_of
== desc
->orig
)
4628 cs
->count
+= desc
->count
;
4631 fprintf (dump_file
, " Adjusted count of an incoming edge of "
4632 "a clone %s -> %s to ", cs
->caller
->dump_name (),
4633 cs
->callee
->dump_name ());
4634 cs
->count
.dump (dump_file
);
4635 fprintf (dump_file
, "\n");
4640 /* When ORIG_NODE has been cloned for values which have been generated fora
4641 self-recursive call as a result of an arithmetic pass-through
4642 jump-functions, adjust its count together with counts of all such clones in
4643 SELF_GEN_CLONES which also at this point contains ORIG_NODE itself.
4645 The function sums the counts of the original node and all its clones that
4646 cannot be attributed to a specific clone because it comes from a
4647 non-recursive edge. This sum is then evenly divided between the clones and
4648 on top of that each one gets all the counts which can be attributed directly
4652 update_counts_for_self_gen_clones (cgraph_node
*orig_node
,
4653 const vec
<cgraph_node
*> &self_gen_clones
)
4655 profile_count redist_sum
= orig_node
->count
.ipa ();
4656 if (!(redist_sum
> profile_count::zero ()))
4660 fprintf (dump_file
, " Updating profile of self recursive clone "
4663 gather_other_count_struct gocs
;
4664 gocs
.orig
= orig_node
;
4665 gocs
.other_count
= profile_count::zero ();
4667 auto_vec
<profile_count
, 8> other_edges_count
;
4668 for (cgraph_node
*n
: self_gen_clones
)
4670 gocs
.other_count
= profile_count::zero ();
4671 n
->call_for_symbol_thunks_and_aliases (gather_count_of_non_rec_edges
,
4673 other_edges_count
.safe_push (gocs
.other_count
);
4674 redist_sum
-= gocs
.other_count
;
4677 hash_set
<cgraph_edge
*> processed_edges
;
4679 for (cgraph_node
*n
: self_gen_clones
)
4681 profile_count orig_count
= n
->count
;
4682 profile_count new_count
4683 = (redist_sum
/ self_gen_clones
.length () + other_edges_count
[i
]);
4684 new_count
= lenient_count_portion_handling (new_count
, orig_node
);
4685 n
->count
= new_count
;
4686 profile_count::adjust_for_ipa_scaling (&new_count
, &orig_count
);
4687 for (cgraph_edge
*cs
= n
->callees
; cs
; cs
= cs
->next_callee
)
4689 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4690 processed_edges
.add (cs
);
4692 for (cgraph_edge
*cs
= n
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4693 cs
->count
= cs
->count
.apply_scale (new_count
, orig_count
);
4698 /* There are still going to be edges to ORIG_NODE that have one or more
4699 clones coming from another node clone in SELF_GEN_CLONES and which we
4700 scaled by the same amount, which means that the total incoming sum of
4701 counts to ORIG_NODE will be too high, scale such edges back. */
4702 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4704 if (cs
->callee
->ultimate_alias_target () == orig_node
)
4707 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4708 if (e
->callee
->ultimate_alias_target () == orig_node
4709 && processed_edges
.contains (e
))
4712 for (cgraph_edge
*e
= cs
; e
; e
= get_next_cgraph_edge_clone (e
))
4713 if (e
->callee
->ultimate_alias_target () == orig_node
4714 && processed_edges
.contains (e
))
4719 /* Edges from the seeds of the valus generated for arithmetic jump-functions
4720 along self-recursive edges are likely to have fairly low count and so
4721 edges from them to nodes in the self_gen_clones do not correspond to the
4722 artificially distributed count of the nodes, the total sum of incoming
4723 edges to some clones might be too low. Detect this situation and correct
4725 for (cgraph_node
*n
: self_gen_clones
)
4727 if (!(n
->count
.ipa () > profile_count::zero ()))
4730 desc_incoming_count_struct desc
;
4731 desc
.orig
= orig_node
;
4732 desc
.processed_edges
= &processed_edges
;
4733 desc
.count
= profile_count::zero ();
4734 desc
.unproc_orig_rec_edges
= 0;
4735 analyze_clone_icoming_counts (n
, &desc
);
4737 if (n
->count
.differs_from_p (desc
.count
))
4739 if (n
->count
> desc
.count
4740 && desc
.unproc_orig_rec_edges
> 0)
4742 desc
.count
= n
->count
- desc
.count
;
4743 desc
.count
= desc
.count
/= desc
.unproc_orig_rec_edges
;
4744 adjust_clone_incoming_counts (n
, &desc
);
4748 " Unable to fix up incoming counts for %s.\n",
4754 for (cgraph_node
*n
: self_gen_clones
)
4755 dump_profile_updates (n
, n
!= orig_node
);
4759 /* After a specialized NEW_NODE version of ORIG_NODE has been created, update
4760 their profile information to reflect this. This function should not be used
4761 for clones generated for arithmetic pass-through jump functions on a
4762 self-recursive call graph edge, that situation is handled by
4763 update_counts_for_self_gen_clones. */
4766 update_profiling_info (struct cgraph_node
*orig_node
,
4767 struct cgraph_node
*new_node
)
4769 struct caller_statistics stats
;
4770 profile_count new_sum
;
4771 profile_count remainder
, orig_node_count
= orig_node
->count
.ipa ();
4773 if (!(orig_node_count
> profile_count::zero ()))
4778 fprintf (dump_file
, " Updating profile from original count: ");
4779 orig_node_count
.dump (dump_file
);
4780 fprintf (dump_file
, "\n");
4783 init_caller_stats (&stats
, new_node
);
4784 new_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
, &stats
,
4786 new_sum
= stats
.count_sum
;
4788 bool orig_edges_processed
= false;
4789 if (new_sum
> orig_node_count
)
4791 /* TODO: Profile has alreay gone astray, keep what we have but lower it
4792 to global0 category. */
4793 remainder
= orig_node
->count
.global0 ();
4795 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4796 cs
->count
= cs
->count
.global0 ();
4797 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4799 cs
= cs
->next_callee
)
4800 cs
->count
= cs
->count
.global0 ();
4801 orig_edges_processed
= true;
4803 else if (stats
.rec_count_sum
.nonzero_p ())
4805 int new_nonrec_calls
= stats
.n_nonrec_calls
;
4806 /* There are self-recursive edges which are likely to bring in the
4807 majority of calls but which we must divide in between the original and
4809 init_caller_stats (&stats
, orig_node
);
4810 orig_node
->call_for_symbol_thunks_and_aliases (gather_caller_stats
,
4812 int orig_nonrec_calls
= stats
.n_nonrec_calls
;
4813 profile_count orig_nonrec_call_count
= stats
.count_sum
;
4815 if (orig_node
->local
)
4817 if (!orig_nonrec_call_count
.nonzero_p ())
4820 fprintf (dump_file
, " The original is local and the only "
4821 "incoming edges from non-dead callers with nonzero "
4822 "counts are self-recursive, assuming it is cold.\n");
4823 /* The NEW_NODE count and counts of all its outgoing edges
4824 are still unmodified copies of ORIG_NODE's. Just clear
4825 the latter and bail out. */
4827 if (opt_for_fn (orig_node
->decl
, flag_profile_partial_training
))
4828 zero
= profile_count::zero ().guessed_local ();
4830 zero
= profile_count::adjusted_zero ();
4831 orig_node
->count
= zero
;
4832 for (cgraph_edge
*cs
= orig_node
->callees
;
4834 cs
= cs
->next_callee
)
4836 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4838 cs
= cs
->next_callee
)
4845 /* Let's behave as if there was another caller that accounts for all
4846 the calls that were either indirect or from other compilation
4848 orig_nonrec_calls
++;
4849 profile_count pretend_caller_count
4850 = (orig_node_count
- new_sum
- orig_nonrec_call_count
4851 - stats
.rec_count_sum
);
4852 orig_nonrec_call_count
+= pretend_caller_count
;
4855 /* Divide all "unexplained" counts roughly proportionally to sums of
4856 counts of non-recursive calls.
4858 We put rather arbitrary limits on how many counts we claim because the
4859 number of non-self-recursive incoming count is only a rough guideline
4860 and there are cases (such as mcf) where using it blindly just takes
4861 too many. And if lattices are considered in the opposite order we
4862 could also take too few. */
4863 profile_count unexp
= orig_node_count
- new_sum
- orig_nonrec_call_count
;
4865 int limit_den
= 2 * (orig_nonrec_calls
+ new_nonrec_calls
);
4866 profile_count new_part
4867 = MAX(MIN (unexp
.apply_scale (new_sum
,
4868 new_sum
+ orig_nonrec_call_count
),
4869 unexp
.apply_scale (limit_den
- 1, limit_den
)),
4870 unexp
.apply_scale (new_nonrec_calls
, limit_den
));
4873 fprintf (dump_file
, " Claiming ");
4874 new_part
.dump (dump_file
);
4875 fprintf (dump_file
, " of unexplained ");
4876 unexp
.dump (dump_file
);
4877 fprintf (dump_file
, " counts because of self-recursive "
4880 new_sum
+= new_part
;
4881 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4885 remainder
= lenient_count_portion_handling (orig_node_count
- new_sum
,
4888 new_sum
= orig_node_count
.combine_with_ipa_count (new_sum
);
4889 new_node
->count
= new_sum
;
4890 orig_node
->count
= remainder
;
4892 profile_count orig_new_node_count
= orig_node_count
;
4893 profile_count::adjust_for_ipa_scaling (&new_sum
, &orig_new_node_count
);
4894 for (cgraph_edge
*cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4895 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4896 for (cgraph_edge
*cs
= new_node
->indirect_calls
; cs
; cs
= cs
->next_callee
)
4897 cs
->count
= cs
->count
.apply_scale (new_sum
, orig_new_node_count
);
4899 if (!orig_edges_processed
)
4901 profile_count::adjust_for_ipa_scaling (&remainder
, &orig_node_count
);
4902 for (cgraph_edge
*cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4903 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4904 for (cgraph_edge
*cs
= orig_node
->indirect_calls
;
4906 cs
= cs
->next_callee
)
4907 cs
->count
= cs
->count
.apply_scale (remainder
, orig_node_count
);
4912 dump_profile_updates (new_node
, true);
4913 dump_profile_updates (orig_node
, false);
4917 /* Update the respective profile of specialized NEW_NODE and the original
4918 ORIG_NODE after additional edges with cumulative count sum REDIRECTED_SUM
4919 have been redirected to the specialized version. */
4922 update_specialized_profile (struct cgraph_node
*new_node
,
4923 struct cgraph_node
*orig_node
,
4924 profile_count redirected_sum
)
4926 struct cgraph_edge
*cs
;
4927 profile_count new_node_count
, orig_node_count
= orig_node
->count
.ipa ();
4931 fprintf (dump_file
, " the sum of counts of redirected edges is ");
4932 redirected_sum
.dump (dump_file
);
4933 fprintf (dump_file
, "\n old ipa count of the original node is ");
4934 orig_node_count
.dump (dump_file
);
4935 fprintf (dump_file
, "\n");
4937 if (!(orig_node_count
> profile_count::zero ()))
4940 new_node_count
= new_node
->count
;
4941 new_node
->count
+= redirected_sum
;
4943 = lenient_count_portion_handling (orig_node
->count
- redirected_sum
,
4946 for (cs
= new_node
->callees
; cs
; cs
= cs
->next_callee
)
4947 cs
->count
+= cs
->count
.apply_scale (redirected_sum
, new_node_count
);
4949 for (cs
= orig_node
->callees
; cs
; cs
= cs
->next_callee
)
4951 profile_count dec
= cs
->count
.apply_scale (redirected_sum
,
4958 dump_profile_updates (new_node
, true);
4959 dump_profile_updates (orig_node
, false);
4963 static void adjust_references_in_caller (cgraph_edge
*cs
,
4964 symtab_node
*symbol
, int index
);
4966 /* Simple structure to pass a symbol and index (with same meaning as parameters
4967 of adjust_references_in_caller) through a void* parameter of a
4968 call_for_symbol_thunks_and_aliases callback. */
4969 struct symbol_and_index_together
4971 symtab_node
*symbol
;
4975 /* Worker callback of call_for_symbol_thunks_and_aliases to recursively call
4976 adjust_references_in_caller on edges up in the call-graph, if necessary. */
4978 adjust_refs_in_act_callers (struct cgraph_node
*node
, void *data
)
4980 symbol_and_index_together
*pack
= (symbol_and_index_together
*) data
;
4981 for (cgraph_edge
*cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
4982 if (!cs
->caller
->thunk
)
4983 adjust_references_in_caller (cs
, pack
->symbol
, pack
->index
);
4987 /* At INDEX of a function being called by CS there is an ADDR_EXPR of a
4988 variable which is only dereferenced and which is represented by SYMBOL. See
4989 if we can remove ADDR reference in callers assosiated witht the call. */
4992 adjust_references_in_caller (cgraph_edge
*cs
, symtab_node
*symbol
, int index
)
4994 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
4995 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, index
);
4996 if (jfunc
->type
== IPA_JF_CONST
)
4998 ipa_ref
*to_del
= cs
->caller
->find_reference (symbol
, cs
->call_stmt
,
5003 to_del
->remove_reference ();
5004 ipa_zap_jf_refdesc (jfunc
);
5006 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5007 cs
->caller
->dump_name (), symbol
->dump_name ());
5011 if (jfunc
->type
!= IPA_JF_PASS_THROUGH
5012 || ipa_get_jf_pass_through_operation (jfunc
) != NOP_EXPR
5013 || ipa_get_jf_pass_through_refdesc_decremented (jfunc
))
5016 int fidx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5017 cgraph_node
*caller
= cs
->caller
;
5018 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (caller
);
5019 /* TODO: This consistency check may be too big and not really
5020 that useful. Consider removing it. */
5022 if (caller_info
->ipcp_orig_node
)
5023 cst
= caller_info
->known_csts
[fidx
];
5026 ipcp_lattice
<tree
> *lat
= ipa_get_scalar_lat (caller_info
, fidx
);
5027 gcc_assert (lat
->is_single_const ());
5028 cst
= lat
->values
->value
;
5030 gcc_assert (TREE_CODE (cst
) == ADDR_EXPR
5031 && (symtab_node::get (get_base_address (TREE_OPERAND (cst
, 0)))
5034 int cuses
= ipa_get_controlled_uses (caller_info
, fidx
);
5035 if (cuses
== IPA_UNDESCRIBED_USE
)
5037 gcc_assert (cuses
> 0);
5039 ipa_set_controlled_uses (caller_info
, fidx
, cuses
);
5040 ipa_set_jf_pass_through_refdesc_decremented (jfunc
, true);
5041 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5042 fprintf (dump_file
, " Controlled uses of parameter %i of %s dropped "
5043 "to %i.\n", fidx
, caller
->dump_name (), cuses
);
5047 if (caller_info
->ipcp_orig_node
)
5049 /* Cloning machinery has created a reference here, we need to either
5050 remove it or change it to a read one. */
5051 ipa_ref
*to_del
= caller
->find_reference (symbol
, NULL
, 0, IPA_REF_ADDR
);
5054 to_del
->remove_reference ();
5056 fprintf (dump_file
, " Removed a reference from %s to %s.\n",
5057 cs
->caller
->dump_name (), symbol
->dump_name ());
5058 if (ipa_get_param_load_dereferenced (caller_info
, fidx
))
5060 caller
->create_reference (symbol
, IPA_REF_LOAD
, NULL
);
5063 " ...and replaced it with LOAD one.\n");
5068 symbol_and_index_together pack
;
5069 pack
.symbol
= symbol
;
5071 if (caller
->can_change_signature
)
5072 caller
->call_for_symbol_thunks_and_aliases (adjust_refs_in_act_callers
,
5077 /* Return true if we would like to remove a parameter from NODE when cloning it
5078 with KNOWN_CSTS scalar constants. */
5081 want_remove_some_param_p (cgraph_node
*node
, vec
<tree
> known_csts
)
5083 auto_vec
<bool, 16> surviving
;
5084 bool filled_vec
= false;
5085 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5086 int i
, count
= ipa_get_param_count (info
);
5088 for (i
= 0; i
< count
; i
++)
5090 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5095 clone_info
*info
= clone_info::get (node
);
5096 if (!info
|| !info
->param_adjustments
)
5098 info
->param_adjustments
->get_surviving_params (&surviving
);
5101 if (surviving
.length() < (unsigned) i
&& surviving
[i
])
5107 /* Create a specialized version of NODE with known constants in KNOWN_CSTS,
5108 known contexts in KNOWN_CONTEXTS and known aggregate values in AGGVALS and
5109 redirect all edges in CALLERS to it. */
5111 static struct cgraph_node
*
5112 create_specialized_node (struct cgraph_node
*node
,
5113 vec
<tree
> known_csts
,
5114 vec
<ipa_polymorphic_call_context
> known_contexts
,
5115 vec
<ipa_argagg_value
, va_gc
> *aggvals
,
5116 vec
<cgraph_edge
*> &callers
)
5118 ipa_node_params
*new_info
, *info
= ipa_node_params_sum
->get (node
);
5119 vec
<ipa_replace_map
*, va_gc
> *replace_trees
= NULL
;
5120 vec
<ipa_adjusted_param
, va_gc
> *new_params
= NULL
;
5121 struct cgraph_node
*new_node
;
5122 int i
, count
= ipa_get_param_count (info
);
5123 clone_info
*cinfo
= clone_info::get (node
);
5124 ipa_param_adjustments
*old_adjustments
= cinfo
5125 ? cinfo
->param_adjustments
: NULL
;
5126 ipa_param_adjustments
*new_adjustments
;
5127 gcc_assert (!info
->ipcp_orig_node
);
5128 gcc_assert (node
->can_change_signature
5129 || !old_adjustments
);
5131 if (old_adjustments
)
5133 /* At the moment all IPA optimizations should use the number of
5134 parameters of the prevailing decl as the m_always_copy_start.
5135 Handling any other value would complicate the code below, so for the
5136 time bing let's only assert it is so. */
5137 gcc_assert (old_adjustments
->m_always_copy_start
== count
5138 || old_adjustments
->m_always_copy_start
< 0);
5139 int old_adj_count
= vec_safe_length (old_adjustments
->m_adj_params
);
5140 for (i
= 0; i
< old_adj_count
; i
++)
5142 ipa_adjusted_param
*old_adj
= &(*old_adjustments
->m_adj_params
)[i
];
5143 if (!node
->can_change_signature
5144 || old_adj
->op
!= IPA_PARAM_OP_COPY
5145 || (!known_csts
[old_adj
->base_index
]
5146 && ipa_is_param_used (info
, old_adj
->base_index
)))
5148 ipa_adjusted_param new_adj
= *old_adj
;
5150 new_adj
.prev_clone_adjustment
= true;
5151 new_adj
.prev_clone_index
= i
;
5152 vec_safe_push (new_params
, new_adj
);
5155 bool skip_return
= old_adjustments
->m_skip_return
;
5156 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5157 ipa_param_adjustments (new_params
, count
,
5160 else if (node
->can_change_signature
5161 && want_remove_some_param_p (node
, known_csts
))
5163 ipa_adjusted_param adj
;
5164 memset (&adj
, 0, sizeof (adj
));
5165 adj
.op
= IPA_PARAM_OP_COPY
;
5166 for (i
= 0; i
< count
; i
++)
5167 if (!known_csts
[i
] && ipa_is_param_used (info
, i
))
5170 adj
.prev_clone_index
= i
;
5171 vec_safe_push (new_params
, adj
);
5173 new_adjustments
= (new (ggc_alloc
<ipa_param_adjustments
> ())
5174 ipa_param_adjustments (new_params
, count
, false));
5177 new_adjustments
= NULL
;
5179 auto_vec
<cgraph_edge
*, 2> self_recursive_calls
;
5180 for (i
= callers
.length () - 1; i
>= 0; i
--)
5182 cgraph_edge
*cs
= callers
[i
];
5183 if (cs
->caller
== node
)
5185 self_recursive_calls
.safe_push (cs
);
5186 callers
.unordered_remove (i
);
5189 replace_trees
= cinfo
? vec_safe_copy (cinfo
->tree_map
) : NULL
;
5190 for (i
= 0; i
< count
; i
++)
5192 tree t
= known_csts
[i
];
5196 gcc_checking_assert (TREE_CODE (t
) != TREE_BINFO
);
5198 bool load_ref
= false;
5199 symtab_node
*ref_symbol
;
5200 if (TREE_CODE (t
) == ADDR_EXPR
)
5202 tree base
= get_base_address (TREE_OPERAND (t
, 0));
5203 if (TREE_CODE (base
) == VAR_DECL
5204 && ipa_get_controlled_uses (info
, i
) == 0
5205 && ipa_get_param_load_dereferenced (info
, i
)
5206 && (ref_symbol
= symtab_node::get (base
)))
5209 if (node
->can_change_signature
)
5210 for (cgraph_edge
*caller
: callers
)
5211 adjust_references_in_caller (caller
, ref_symbol
, i
);
5215 ipa_replace_map
*replace_map
= get_replacement_map (info
, t
, i
, load_ref
);
5217 vec_safe_push (replace_trees
, replace_map
);
5220 unsigned &suffix_counter
= clone_num_suffixes
->get_or_insert (
5221 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (
5223 new_node
= node
->create_virtual_clone (callers
, replace_trees
,
5224 new_adjustments
, "constprop",
5228 bool have_self_recursive_calls
= !self_recursive_calls
.is_empty ();
5229 for (unsigned j
= 0; j
< self_recursive_calls
.length (); j
++)
5231 cgraph_edge
*cs
= get_next_cgraph_edge_clone (self_recursive_calls
[j
]);
5232 /* Cloned edges can disappear during cloning as speculation can be
5233 resolved, check that we have one and that it comes from the last
5235 if (cs
&& cs
->caller
== new_node
)
5236 cs
->redirect_callee_duplicating_thunks (new_node
);
5237 /* Any future code that would make more than one clone of an outgoing
5238 edge would confuse this mechanism, so let's check that does not
5240 gcc_checking_assert (!cs
5241 || !get_next_cgraph_edge_clone (cs
)
5242 || get_next_cgraph_edge_clone (cs
)->caller
!= new_node
);
5244 if (have_self_recursive_calls
)
5245 new_node
->expand_all_artificial_thunks ();
5247 ipa_set_node_agg_value_chain (new_node
, aggvals
);
5248 for (const ipa_argagg_value
&av
: aggvals
)
5249 new_node
->maybe_create_reference (av
.value
, NULL
);
5251 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5253 fprintf (dump_file
, " the new node is %s.\n", new_node
->dump_name ());
5254 if (known_contexts
.exists ())
5256 for (i
= 0; i
< count
; i
++)
5257 if (!known_contexts
[i
].useless_p ())
5259 fprintf (dump_file
, " known ctx %i is ", i
);
5260 known_contexts
[i
].dump (dump_file
);
5265 fprintf (dump_file
, " Aggregate replacements:");
5266 ipa_argagg_value_list
avs (aggvals
);
5267 avs
.dump (dump_file
);
5271 new_info
= ipa_node_params_sum
->get (new_node
);
5272 new_info
->ipcp_orig_node
= node
;
5273 new_node
->ipcp_clone
= true;
5274 new_info
->known_csts
= known_csts
;
5275 new_info
->known_contexts
= known_contexts
;
5277 ipcp_discover_new_direct_edges (new_node
, known_csts
, known_contexts
,
5283 /* Return true if JFUNC, which describes a i-th parameter of call CS, is a
5284 pass-through function to itself when the cgraph_node involved is not an
5285 IPA-CP clone. When SIMPLE is true, further check if JFUNC is a simple
5286 no-operation pass-through. */
5289 self_recursive_pass_through_p (cgraph_edge
*cs
, ipa_jump_func
*jfunc
, int i
,
5292 enum availability availability
;
5293 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5294 && availability
> AVAIL_INTERPOSABLE
5295 && jfunc
->type
== IPA_JF_PASS_THROUGH
5296 && (!simple
|| ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5297 && ipa_get_jf_pass_through_formal_id (jfunc
) == i
5298 && ipa_node_params_sum
->get (cs
->caller
)
5299 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5304 /* Return true if JFUNC, which describes a part of an aggregate represented or
5305 pointed to by the i-th parameter of call CS, is a pass-through function to
5306 itself when the cgraph_node involved is not an IPA-CP clone.. When
5307 SIMPLE is true, further check if JFUNC is a simple no-operation
5311 self_recursive_agg_pass_through_p (const cgraph_edge
*cs
,
5312 const ipa_agg_jf_item
*jfunc
,
5313 int i
, bool simple
= true)
5315 enum availability availability
;
5316 if (cs
->caller
== cs
->callee
->function_symbol (&availability
)
5317 && availability
> AVAIL_INTERPOSABLE
5318 && jfunc
->jftype
== IPA_JF_LOAD_AGG
5319 && jfunc
->offset
== jfunc
->value
.load_agg
.offset
5320 && (!simple
|| jfunc
->value
.pass_through
.operation
== NOP_EXPR
)
5321 && jfunc
->value
.pass_through
.formal_id
== i
5322 && useless_type_conversion_p (jfunc
->value
.load_agg
.type
, jfunc
->type
)
5323 && ipa_node_params_sum
->get (cs
->caller
)
5324 && !ipa_node_params_sum
->get (cs
->caller
)->ipcp_orig_node
)
5329 /* Given a NODE, and a subset of its CALLERS, try to populate blanks slots in
5330 KNOWN_CSTS with constants that are also known for all of the CALLERS. */
5333 find_more_scalar_values_for_callers_subset (struct cgraph_node
*node
,
5334 vec
<tree
> &known_csts
,
5335 const vec
<cgraph_edge
*> &callers
)
5337 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5338 int i
, count
= ipa_get_param_count (info
);
5340 for (i
= 0; i
< count
; i
++)
5342 struct cgraph_edge
*cs
;
5343 tree newval
= NULL_TREE
;
5346 tree type
= ipa_get_type (info
, i
);
5348 if (ipa_get_scalar_lat (info
, i
)->bottom
|| known_csts
[i
])
5351 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5353 struct ipa_jump_func
*jump_func
;
5356 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5358 || i
>= ipa_get_cs_argument_count (args
)
5360 && call_passes_through_thunk (cs
)))
5365 jump_func
= ipa_get_ith_jump_func (args
, i
);
5367 /* Besides simple pass-through jump function, arithmetic jump
5368 function could also introduce argument-direct-pass-through for
5369 self-feeding recursive call. For example,
5376 Given that i is 0, recursive propagation via (i & 1) also gets
5378 if (self_recursive_pass_through_p (cs
, jump_func
, i
, false))
5380 gcc_assert (newval
);
5381 t
= ipa_get_jf_arith_result (
5382 ipa_get_jf_pass_through_operation (jump_func
),
5384 ipa_get_jf_pass_through_operand (jump_func
),
5388 t
= ipa_value_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5392 && !values_equal_for_ipcp_p (t
, newval
))
5393 || (!first
&& !newval
))
5405 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5407 fprintf (dump_file
, " adding an extra known scalar value ");
5408 print_ipcp_constant_value (dump_file
, newval
);
5409 fprintf (dump_file
, " for ");
5410 ipa_dump_param (dump_file
, info
, i
);
5411 fprintf (dump_file
, "\n");
5414 known_csts
[i
] = newval
;
5419 /* Given a NODE and a subset of its CALLERS, try to populate plank slots in
5420 KNOWN_CONTEXTS with polymorphic contexts that are also known for all of the
5424 find_more_contexts_for_caller_subset (cgraph_node
*node
,
5425 vec
<ipa_polymorphic_call_context
>
5427 const vec
<cgraph_edge
*> &callers
)
5429 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
5430 int i
, count
= ipa_get_param_count (info
);
5432 for (i
= 0; i
< count
; i
++)
5436 if (ipa_get_poly_ctx_lat (info
, i
)->bottom
5437 || (known_contexts
->exists ()
5438 && !(*known_contexts
)[i
].useless_p ()))
5441 ipa_polymorphic_call_context newval
;
5445 FOR_EACH_VEC_ELT (callers
, j
, cs
)
5447 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5449 || i
>= ipa_get_cs_argument_count (args
))
5451 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (args
, i
);
5452 ipa_polymorphic_call_context ctx
;
5453 ctx
= ipa_context_from_jfunc (ipa_node_params_sum
->get (cs
->caller
),
5461 newval
.meet_with (ctx
);
5462 if (newval
.useless_p ())
5466 if (!newval
.useless_p ())
5468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5470 fprintf (dump_file
, " adding an extra known polymorphic "
5472 print_ipcp_constant_value (dump_file
, newval
);
5473 fprintf (dump_file
, " for ");
5474 ipa_dump_param (dump_file
, info
, i
);
5475 fprintf (dump_file
, "\n");
5478 if (!known_contexts
->exists ())
5479 known_contexts
->safe_grow_cleared (ipa_get_param_count (info
),
5481 (*known_contexts
)[i
] = newval
;
5487 /* Push all aggregate values coming along edge CS for parameter number INDEX to
5488 RES. If INTERIM is non-NULL, it contains the current interim state of
5489 collected aggregate values which can be used to compute values passed over
5490 self-recursive edges.
5492 This basically one iteration of push_agg_values_from_edge over one
5493 parameter, which allows for simpler early returns. */
5496 push_agg_values_for_index_from_edge (struct cgraph_edge
*cs
, int index
,
5497 vec
<ipa_argagg_value
> *res
,
5498 const ipa_argagg_value_list
*interim
)
5500 bool agg_values_from_caller
= false;
5501 bool agg_jf_preserved
= false;
5502 unsigned unit_delta
= UINT_MAX
;
5504 ipa_jump_func
*jfunc
= ipa_get_ith_jump_func (ipa_edge_args_sum
->get (cs
),
5507 if (jfunc
->type
== IPA_JF_PASS_THROUGH
5508 && ipa_get_jf_pass_through_operation (jfunc
) == NOP_EXPR
)
5510 agg_values_from_caller
= true;
5511 agg_jf_preserved
= ipa_get_jf_pass_through_agg_preserved (jfunc
);
5512 src_idx
= ipa_get_jf_pass_through_formal_id (jfunc
);
5515 else if (jfunc
->type
== IPA_JF_ANCESTOR
5516 && ipa_get_jf_ancestor_agg_preserved (jfunc
))
5518 agg_values_from_caller
= true;
5519 agg_jf_preserved
= true;
5520 src_idx
= ipa_get_jf_ancestor_formal_id (jfunc
);
5521 unit_delta
= ipa_get_jf_ancestor_offset (jfunc
) / BITS_PER_UNIT
;
5524 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5525 if (agg_values_from_caller
)
5527 if (caller_info
->ipcp_orig_node
)
5529 struct cgraph_node
*orig_node
= caller_info
->ipcp_orig_node
;
5530 ipcp_transformation
*ts
5531 = ipcp_get_transformation_summary (cs
->caller
);
5532 ipa_node_params
*orig_info
= ipa_node_params_sum
->get (orig_node
);
5533 ipcp_param_lattices
*orig_plats
5534 = ipa_get_parm_lattices (orig_info
, src_idx
);
5537 && (agg_jf_preserved
|| !orig_plats
->aggs_by_ref
))
5539 ipa_argagg_value_list
src (ts
);
5540 src
.push_adjusted_values (src_idx
, index
, unit_delta
, res
);
5546 ipcp_param_lattices
*src_plats
5547 = ipa_get_parm_lattices (caller_info
, src_idx
);
5549 && !src_plats
->aggs_bottom
5550 && (agg_jf_preserved
|| !src_plats
->aggs_by_ref
))
5552 if (interim
&& self_recursive_pass_through_p (cs
, jfunc
, index
))
5554 interim
->push_adjusted_values (src_idx
, index
, unit_delta
,
5558 if (!src_plats
->aggs_contain_variable
)
5560 push_agg_values_from_plats (src_plats
, index
, unit_delta
,
5568 if (!jfunc
->agg
.items
)
5571 unsigned prev_unit_offset
= 0;
5572 for (const ipa_agg_jf_item
&agg_jf
: *jfunc
->agg
.items
)
5574 tree value
, srcvalue
;
5575 /* Besides simple pass-through aggregate jump function, arithmetic
5576 aggregate jump function could also bring same aggregate value as
5577 parameter passed-in for self-feeding recursive call. For example,
5585 Given that *i is 0, recursive propagation via (*i & 1) also gets 0. */
5587 && self_recursive_agg_pass_through_p (cs
, &agg_jf
, index
, false)
5588 && (srcvalue
= interim
->get_value(index
,
5589 agg_jf
.offset
/ BITS_PER_UNIT
)))
5590 value
= ipa_get_jf_arith_result (agg_jf
.value
.pass_through
.operation
,
5592 agg_jf
.value
.pass_through
.operand
,
5595 value
= ipa_agg_value_from_jfunc (caller_info
, cs
->caller
,
5599 struct ipa_argagg_value iav
;
5601 iav
.unit_offset
= agg_jf
.offset
/ BITS_PER_UNIT
;
5603 iav
.by_ref
= jfunc
->agg
.by_ref
;
5607 || iav
.unit_offset
> prev_unit_offset
);
5608 prev_unit_offset
= iav
.unit_offset
;
5611 res
->safe_push (iav
);
5617 /* Push all aggregate values coming along edge CS to RES. DEST_INFO is the
5618 description of ultimate callee of CS or the one it was cloned from (the
5619 summary where lattices are). If INTERIM is non-NULL, it contains the
5620 current interim state of collected aggregate values which can be used to
5621 compute values passed over self-recursive edges (if OPTIMIZE_SELF_RECURSION
5622 is true) and to skip values which clearly will not be part of intersection
5626 push_agg_values_from_edge (struct cgraph_edge
*cs
,
5627 ipa_node_params
*dest_info
,
5628 vec
<ipa_argagg_value
> *res
,
5629 const ipa_argagg_value_list
*interim
,
5630 bool optimize_self_recursion
)
5632 ipa_edge_args
*args
= ipa_edge_args_sum
->get (cs
);
5636 int count
= MIN (ipa_get_param_count (dest_info
),
5637 ipa_get_cs_argument_count (args
));
5639 unsigned interim_index
= 0;
5640 for (int index
= 0; index
< count
; index
++)
5644 while (interim_index
< interim
->m_elts
.size ()
5645 && interim
->m_elts
[interim_index
].value
5646 && interim
->m_elts
[interim_index
].index
< index
)
5648 if (interim_index
>= interim
->m_elts
.size ()
5649 || interim
->m_elts
[interim_index
].index
> index
)
5653 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (dest_info
, index
);
5654 if (!ipa_is_param_used (dest_info
, index
)
5655 || plats
->aggs_bottom
)
5657 push_agg_values_for_index_from_edge (cs
, index
, res
,
5658 optimize_self_recursion
? interim
5664 /* Look at edges in CALLERS and collect all known aggregate values that arrive
5665 from all of them. Return nullptr if there are none. */
5667 static struct vec
<ipa_argagg_value
, va_gc
> *
5668 find_aggregate_values_for_callers_subset (struct cgraph_node
*node
,
5669 const vec
<cgraph_edge
*> &callers
)
5671 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5672 if (dest_info
->ipcp_orig_node
)
5673 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5675 /* gather_edges_for_value puts a non-recursive call into the first element of
5676 callers if it can. */
5677 auto_vec
<ipa_argagg_value
, 32> interim
;
5678 push_agg_values_from_edge (callers
[0], dest_info
, &interim
, NULL
, true);
5680 unsigned valid_entries
= interim
.length ();
5684 unsigned caller_count
= callers
.length();
5685 for (unsigned i
= 1; i
< caller_count
; i
++)
5687 auto_vec
<ipa_argagg_value
, 32> last
;
5688 ipa_argagg_value_list
avs (&interim
);
5689 push_agg_values_from_edge (callers
[i
], dest_info
, &last
, &avs
, true);
5691 valid_entries
= intersect_argaggs_with (interim
, last
);
5696 vec
<ipa_argagg_value
, va_gc
> *res
= NULL
;
5697 vec_safe_reserve_exact (res
, valid_entries
);
5698 for (const ipa_argagg_value
&av
: interim
)
5700 res
->quick_push(av
);
5701 gcc_checking_assert (res
->length () == valid_entries
);
5705 /* Determine whether CS also brings all scalar values that the NODE is
5709 cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge
*cs
,
5710 struct cgraph_node
*node
)
5712 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5713 int count
= ipa_get_param_count (dest_info
);
5714 class ipa_node_params
*caller_info
;
5715 class ipa_edge_args
*args
;
5718 caller_info
= ipa_node_params_sum
->get (cs
->caller
);
5719 args
= ipa_edge_args_sum
->get (cs
);
5720 for (i
= 0; i
< count
; i
++)
5722 struct ipa_jump_func
*jump_func
;
5725 val
= dest_info
->known_csts
[i
];
5729 if (i
>= ipa_get_cs_argument_count (args
))
5731 jump_func
= ipa_get_ith_jump_func (args
, i
);
5732 t
= ipa_value_from_jfunc (caller_info
, jump_func
,
5733 ipa_get_type (dest_info
, i
));
5734 if (!t
|| !values_equal_for_ipcp_p (val
, t
))
5740 /* Determine whether CS also brings all aggregate values that NODE is
5744 cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge
*cs
,
5745 struct cgraph_node
*node
)
5747 ipcp_transformation
*ts
= ipcp_get_transformation_summary (node
);
5748 if (!ts
|| vec_safe_is_empty (ts
->m_agg_values
))
5751 const ipa_argagg_value_list
existing (ts
->m_agg_values
);
5752 auto_vec
<ipa_argagg_value
, 32> edge_values
;
5753 ipa_node_params
*dest_info
= ipa_node_params_sum
->get (node
);
5754 gcc_checking_assert (dest_info
->ipcp_orig_node
);
5755 dest_info
= ipa_node_params_sum
->get (dest_info
->ipcp_orig_node
);
5756 push_agg_values_from_edge (cs
, dest_info
, &edge_values
, &existing
, false);
5757 const ipa_argagg_value_list
avl (&edge_values
);
5758 return avl
.superset_of_p (existing
);
5761 /* Given an original NODE and a VAL for which we have already created a
5762 specialized clone, look whether there are incoming edges that still lead
5763 into the old node but now also bring the requested value and also conform to
5764 all other criteria such that they can be redirected the special node.
5765 This function can therefore redirect the final edge in a SCC. */
5767 template <typename valtype
>
5769 perhaps_add_new_callers (cgraph_node
*node
, ipcp_value
<valtype
> *val
)
5771 ipcp_value_source
<valtype
> *src
;
5772 profile_count redirected_sum
= profile_count::zero ();
5774 for (src
= val
->sources
; src
; src
= src
->next
)
5776 struct cgraph_edge
*cs
= src
->cs
;
5779 if (cgraph_edge_brings_value_p (cs
, src
, node
, val
)
5780 && cgraph_edge_brings_all_scalars_for_node (cs
, val
->spec_node
)
5781 && cgraph_edge_brings_all_agg_vals_for_node (cs
, val
->spec_node
))
5784 fprintf (dump_file
, " - adding an extra caller %s of %s\n",
5785 cs
->caller
->dump_name (),
5786 val
->spec_node
->dump_name ());
5788 cs
->redirect_callee_duplicating_thunks (val
->spec_node
);
5789 val
->spec_node
->expand_all_artificial_thunks ();
5790 if (cs
->count
.ipa ().initialized_p ())
5791 redirected_sum
= redirected_sum
+ cs
->count
.ipa ();
5793 cs
= get_next_cgraph_edge_clone (cs
);
5797 if (redirected_sum
.nonzero_p ())
5798 update_specialized_profile (val
->spec_node
, node
, redirected_sum
);
5801 /* Return true if KNOWN_CONTEXTS contain at least one useful context. */
5804 known_contexts_useful_p (vec
<ipa_polymorphic_call_context
> known_contexts
)
5806 ipa_polymorphic_call_context
*ctx
;
5809 FOR_EACH_VEC_ELT (known_contexts
, i
, ctx
)
5810 if (!ctx
->useless_p ())
5815 /* Return a copy of KNOWN_CSTS if it is not empty, otherwise return vNULL. */
5817 static vec
<ipa_polymorphic_call_context
>
5818 copy_useful_known_contexts (const vec
<ipa_polymorphic_call_context
> &known_contexts
)
5820 if (known_contexts_useful_p (known_contexts
))
5821 return known_contexts
.copy ();
5826 /* Copy known scalar values from AVALS into KNOWN_CSTS and modify the copy
5827 according to VAL and INDEX. If non-empty, replace KNOWN_CONTEXTS with its
5831 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5832 vec
<tree
> *known_csts
,
5833 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5834 ipcp_value
<tree
> *val
, int index
)
5836 *known_csts
= avals
->m_known_vals
.copy ();
5837 *known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5838 (*known_csts
)[index
] = val
->value
;
5841 /* Copy known scalar values from AVALS into KNOWN_CSTS. Similarly, copy
5842 contexts to KNOWN_CONTEXTS and modify the copy according to VAL and
5846 copy_known_vectors_add_val (ipa_auto_call_arg_values
*avals
,
5847 vec
<tree
> *known_csts
,
5848 vec
<ipa_polymorphic_call_context
> *known_contexts
,
5849 ipcp_value
<ipa_polymorphic_call_context
> *val
,
5852 *known_csts
= avals
->m_known_vals
.copy ();
5853 *known_contexts
= avals
->m_known_contexts
.copy ();
5854 (*known_contexts
)[index
] = val
->value
;
5857 /* Return true if OFFSET indicates this was not an aggregate value or there is
5858 a replacement equivalent to VALUE, INDEX and OFFSET among those in the
5862 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *aggvals
,
5863 int index
, HOST_WIDE_INT offset
, tree value
)
5868 const ipa_argagg_value_list
avl (aggvals
);
5869 tree v
= avl
.get_value (index
, offset
/ BITS_PER_UNIT
);
5870 return v
&& values_equal_for_ipcp_p (v
, value
);
5873 /* Return true if offset is minus one because source of a polymorphic context
5874 cannot be an aggregate value. */
5877 ipcp_val_agg_replacement_ok_p (vec
<ipa_argagg_value
, va_gc
> *,
5878 int , HOST_WIDE_INT offset
,
5879 ipa_polymorphic_call_context
)
5881 return offset
== -1;
5884 /* Decide whether to create a special version of NODE for value VAL of
5885 parameter at the given INDEX. If OFFSET is -1, the value is for the
5886 parameter itself, otherwise it is stored at the given OFFSET of the
5887 parameter. AVALS describes the other already known values. SELF_GEN_CLONES
5888 is a vector which contains clones created for self-recursive calls with an
5889 arithmetic pass-through jump function. */
5891 template <typename valtype
>
5893 decide_about_value (struct cgraph_node
*node
, int index
, HOST_WIDE_INT offset
,
5894 ipcp_value
<valtype
> *val
, ipa_auto_call_arg_values
*avals
,
5895 vec
<cgraph_node
*> *self_gen_clones
)
5899 profile_count count_sum
, rec_count_sum
;
5900 vec
<cgraph_edge
*> callers
;
5904 perhaps_add_new_callers (node
, val
);
5907 else if (val
->local_size_cost
+ overall_size
> get_max_overall_size (node
))
5909 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5910 fprintf (dump_file
, " Ignoring candidate value because "
5911 "maximum unit size would be reached with %li.\n",
5912 val
->local_size_cost
+ overall_size
);
5915 else if (!get_info_about_necessary_edges (val
, node
, &freq_sum
, &caller_count
,
5916 &rec_count_sum
, &count_sum
))
5919 if (!dbg_cnt (ipa_cp_values
))
5922 if (val
->self_recursion_generated_p ())
5924 /* The edge counts in this case might not have been adjusted yet.
5925 Nevertleless, even if they were it would be only a guesswork which we
5926 can do now. The recursive part of the counts can be derived from the
5927 count of the original node anyway. */
5928 if (node
->count
.ipa ().nonzero_p ())
5930 unsigned dem
= self_gen_clones
->length () + 1;
5931 rec_count_sum
= node
->count
.ipa () / dem
;
5934 rec_count_sum
= profile_count::zero ();
5937 /* get_info_about_necessary_edges only sums up ipa counts. */
5938 count_sum
+= rec_count_sum
;
5940 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5942 fprintf (dump_file
, " - considering value ");
5943 print_ipcp_constant_value (dump_file
, val
->value
);
5944 fprintf (dump_file
, " for ");
5945 ipa_dump_param (dump_file
, ipa_node_params_sum
->get (node
), index
);
5947 fprintf (dump_file
, ", offset: " HOST_WIDE_INT_PRINT_DEC
, offset
);
5948 fprintf (dump_file
, " (caller_count: %i)\n", caller_count
);
5951 if (!good_cloning_opportunity_p (node
, val
->local_time_benefit
,
5952 freq_sum
, count_sum
,
5953 val
->local_size_cost
)
5954 && !good_cloning_opportunity_p (node
, val
->prop_time_benefit
,
5955 freq_sum
, count_sum
, val
->prop_size_cost
))
5959 fprintf (dump_file
, " Creating a specialized node of %s.\n",
5960 node
->dump_name ());
5962 vec
<tree
> known_csts
;
5963 vec
<ipa_polymorphic_call_context
> known_contexts
;
5965 callers
= gather_edges_for_value (val
, node
, caller_count
);
5967 copy_known_vectors_add_val (avals
, &known_csts
, &known_contexts
, val
, index
);
5970 known_csts
= avals
->m_known_vals
.copy ();
5971 known_contexts
= copy_useful_known_contexts (avals
->m_known_contexts
);
5973 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
5974 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
5975 vec
<ipa_argagg_value
, va_gc
> *aggvals
5976 = find_aggregate_values_for_callers_subset (node
, callers
);
5977 gcc_checking_assert (ipcp_val_agg_replacement_ok_p (aggvals
, index
,
5978 offset
, val
->value
));
5979 val
->spec_node
= create_specialized_node (node
, known_csts
, known_contexts
,
5982 if (val
->self_recursion_generated_p ())
5983 self_gen_clones
->safe_push (val
->spec_node
);
5985 update_profiling_info (node
, val
->spec_node
);
5988 overall_size
+= val
->local_size_cost
;
5989 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5990 fprintf (dump_file
, " overall size reached %li\n",
5993 /* TODO: If for some lattice there is only one other known value
5994 left, make a special node for it too. */
5999 /* Like irange::contains_p(), but convert VAL to the range of R if
6003 ipa_range_contains_p (const vrange
&r
, tree val
)
6005 if (r
.undefined_p ())
6008 tree type
= r
.type ();
6009 if (!wi::fits_to_tree_p (wi::to_wide (val
), type
))
6012 val
= fold_convert (type
, val
);
6013 return r
.contains_p (val
);
6016 /* Decide whether and what specialized clones of NODE should be created. */
6019 decide_whether_version_node (struct cgraph_node
*node
)
6021 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6022 int i
, count
= ipa_get_param_count (info
);
6028 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6029 fprintf (dump_file
, "\nEvaluating opportunities for %s.\n",
6030 node
->dump_name ());
6032 auto_vec
<cgraph_node
*, 9> self_gen_clones
;
6033 ipa_auto_call_arg_values avals
;
6034 gather_context_independent_values (info
, &avals
, false, NULL
);
6036 for (i
= 0; i
< count
;i
++)
6038 if (!ipa_is_param_used (info
, i
))
6041 class ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6042 ipcp_lattice
<tree
> *lat
= &plats
->itself
;
6043 ipcp_lattice
<ipa_polymorphic_call_context
> *ctxlat
= &plats
->ctxlat
;
6046 && !avals
.m_known_vals
[i
])
6048 ipcp_value
<tree
> *val
;
6049 for (val
= lat
->values
; val
; val
= val
->next
)
6051 /* If some values generated for self-recursive calls with
6052 arithmetic jump functions fall outside of the known
6053 range for the parameter, we can skip them. */
6054 if (TREE_CODE (val
->value
) == INTEGER_CST
6055 && !plats
->m_value_range
.bottom_p ()
6056 && !ipa_range_contains_p (plats
->m_value_range
.m_vr
,
6059 /* This can happen also if a constant present in the source
6060 code falls outside of the range of parameter's type, so we
6062 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6064 fprintf (dump_file
, " - skipping%s value ",
6065 val
->self_recursion_generated_p ()
6066 ? " self_recursion_generated" : "");
6067 print_ipcp_constant_value (dump_file
, val
->value
);
6068 fprintf (dump_file
, " because it is outside known "
6073 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6078 if (!plats
->aggs_bottom
)
6080 struct ipcp_agg_lattice
*aglat
;
6081 ipcp_value
<tree
> *val
;
6082 for (aglat
= plats
->aggs
; aglat
; aglat
= aglat
->next
)
6083 if (!aglat
->bottom
&& aglat
->values
6084 /* If the following is false, the one value has been considered
6085 for cloning for all contexts. */
6086 && (plats
->aggs_contain_variable
6087 || !aglat
->is_single_const ()))
6088 for (val
= aglat
->values
; val
; val
= val
->next
)
6089 ret
|= decide_about_value (node
, i
, aglat
->offset
, val
, &avals
,
6094 && avals
.m_known_contexts
[i
].useless_p ())
6096 ipcp_value
<ipa_polymorphic_call_context
> *val
;
6097 for (val
= ctxlat
->values
; val
; val
= val
->next
)
6098 ret
|= decide_about_value (node
, i
, -1, val
, &avals
,
6103 if (!self_gen_clones
.is_empty ())
6105 self_gen_clones
.safe_push (node
);
6106 update_counts_for_self_gen_clones (node
, self_gen_clones
);
6109 if (info
->do_clone_for_all_contexts
)
6111 if (!dbg_cnt (ipa_cp_values
))
6113 info
->do_clone_for_all_contexts
= false;
6117 struct cgraph_node
*clone
;
6118 auto_vec
<cgraph_edge
*> callers
= node
->collect_callers ();
6120 for (int i
= callers
.length () - 1; i
>= 0; i
--)
6122 cgraph_edge
*cs
= callers
[i
];
6123 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6125 if (caller_info
&& caller_info
->node_dead
)
6126 callers
.unordered_remove (i
);
6129 if (!adjust_callers_for_value_intersection (callers
, node
))
6131 /* If node is not called by anyone, or all its caller edges are
6132 self-recursive, the node is not really in use, no need to do
6134 info
->do_clone_for_all_contexts
= false;
6139 fprintf (dump_file
, " - Creating a specialized node of %s "
6140 "for all known contexts.\n", node
->dump_name ());
6142 vec
<tree
> known_csts
= avals
.m_known_vals
.copy ();
6143 vec
<ipa_polymorphic_call_context
> known_contexts
6144 = copy_useful_known_contexts (avals
.m_known_contexts
);
6145 find_more_scalar_values_for_callers_subset (node
, known_csts
, callers
);
6146 find_more_contexts_for_caller_subset (node
, &known_contexts
, callers
);
6147 vec
<ipa_argagg_value
, va_gc
> *aggvals
6148 = find_aggregate_values_for_callers_subset (node
, callers
);
6150 if (!known_contexts_useful_p (known_contexts
))
6152 known_contexts
.release ();
6153 known_contexts
= vNULL
;
6155 clone
= create_specialized_node (node
, known_csts
, known_contexts
,
6157 info
->do_clone_for_all_contexts
= false;
6158 ipa_node_params_sum
->get (clone
)->is_all_contexts_clone
= true;
6165 /* Transitively mark all callees of NODE within the same SCC as not dead. */
6168 spread_undeadness (struct cgraph_node
*node
)
6170 struct cgraph_edge
*cs
;
6172 for (cs
= node
->callees
; cs
; cs
= cs
->next_callee
)
6173 if (ipa_edge_within_scc (cs
))
6175 struct cgraph_node
*callee
;
6176 class ipa_node_params
*info
;
6178 callee
= cs
->callee
->function_symbol (NULL
);
6179 info
= ipa_node_params_sum
->get (callee
);
6181 if (info
&& info
->node_dead
)
6183 info
->node_dead
= 0;
6184 spread_undeadness (callee
);
6189 /* Return true if NODE has a caller from outside of its SCC that is not
6190 dead. Worker callback for cgraph_for_node_and_aliases. */
6193 has_undead_caller_from_outside_scc_p (struct cgraph_node
*node
,
6194 void *data ATTRIBUTE_UNUSED
)
6196 struct cgraph_edge
*cs
;
6198 for (cs
= node
->callers
; cs
; cs
= cs
->next_caller
)
6199 if (cs
->caller
->thunk
6200 && cs
->caller
->call_for_symbol_thunks_and_aliases
6201 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6203 else if (!ipa_edge_within_scc (cs
))
6205 ipa_node_params
*caller_info
= ipa_node_params_sum
->get (cs
->caller
);
6206 if (!caller_info
/* Unoptimized caller are like dead ones. */
6207 || !caller_info
->node_dead
)
6214 /* Identify nodes within the same SCC as NODE which are no longer needed
6215 because of new clones and will be removed as unreachable. */
6218 identify_dead_nodes (struct cgraph_node
*node
)
6220 struct cgraph_node
*v
;
6221 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6224 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6226 && !v
->call_for_symbol_thunks_and_aliases
6227 (has_undead_caller_from_outside_scc_p
, NULL
, true))
6228 info
->node_dead
= 1;
6231 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6233 ipa_node_params
*info
= ipa_node_params_sum
->get (v
);
6234 if (info
&& !info
->node_dead
)
6235 spread_undeadness (v
);
6238 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6240 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6241 if (ipa_node_params_sum
->get (v
)
6242 && ipa_node_params_sum
->get (v
)->node_dead
)
6243 fprintf (dump_file
, " Marking node as dead: %s.\n",
6248 /* The decision stage. Iterate over the topological order of call graph nodes
6249 TOPO and make specialized clones if deemed beneficial. */
6252 ipcp_decision_stage (class ipa_topo_info
*topo
)
6257 fprintf (dump_file
, "\nIPA decision stage:\n\n");
6259 for (i
= topo
->nnodes
- 1; i
>= 0; i
--)
6261 struct cgraph_node
*node
= topo
->order
[i
];
6262 bool change
= false, iterate
= true;
6266 struct cgraph_node
*v
;
6268 for (v
= node
; v
; v
= ((struct ipa_dfs_info
*) v
->aux
)->next_cycle
)
6269 if (v
->has_gimple_body_p ()
6270 && ipcp_versionable_function_p (v
))
6271 iterate
|= decide_whether_version_node (v
);
6276 identify_dead_nodes (node
);
6280 /* Look up all VR and bits information that we have discovered and copy it
6281 over to the transformation summary. */
6284 ipcp_store_vr_results (void)
6288 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6290 ipa_node_params
*info
= ipa_node_params_sum
->get (node
);
6291 bool dumped_sth
= false;
6292 bool found_useful_result
= false;
6294 bool do_bits
= true;
6296 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_vrp
))
6299 fprintf (dump_file
, "Not considering %s for VR discovery "
6300 "and propagate; -fipa-ipa-vrp: disabled.\n",
6301 node
->dump_name ());
6304 if (!info
|| !opt_for_fn (node
->decl
, flag_ipa_bit_cp
))
6307 fprintf (dump_file
, "Not considering %s for ipa bitwise "
6308 "propagation ; -fipa-bit-cp: disabled.\n",
6309 node
->dump_name ());
6312 if (!do_bits
&& !do_vr
)
6315 if (info
->ipcp_orig_node
)
6316 info
= ipa_node_params_sum
->get (info
->ipcp_orig_node
);
6317 if (info
->lattices
.is_empty ())
6318 /* Newly expanded artificial thunks do not have lattices. */
6321 unsigned count
= ipa_get_param_count (info
);
6322 for (unsigned i
= 0; i
< count
; i
++)
6324 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6326 && !plats
->m_value_range
.bottom_p ()
6327 && !plats
->m_value_range
.top_p ())
6329 found_useful_result
= true;
6332 if (do_bits
&& plats
->bits_lattice
.constant_p ())
6334 found_useful_result
= true;
6338 if (!found_useful_result
)
6341 ipcp_transformation_initialize ();
6342 ipcp_transformation
*ts
= ipcp_transformation_sum
->get_create (node
);
6343 vec_safe_reserve_exact (ts
->m_vr
, count
);
6345 for (unsigned i
= 0; i
< count
; i
++)
6347 ipcp_param_lattices
*plats
= ipa_get_parm_lattices (info
, i
);
6348 ipcp_bits_lattice
*bits
= NULL
;
6351 && plats
->bits_lattice
.constant_p ()
6352 && dbg_cnt (ipa_cp_bits
))
6353 bits
= &plats
->bits_lattice
;
6356 && !plats
->m_value_range
.bottom_p ()
6357 && !plats
->m_value_range
.top_p ()
6358 && dbg_cnt (ipa_cp_vr
))
6362 value_range tmp
= plats
->m_value_range
.m_vr
;
6363 tree type
= ipa_get_type (info
, i
);
6364 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6365 TYPE_PRECISION (type
),
6367 wide_int::from (bits
->get_mask (),
6368 TYPE_PRECISION (type
),
6370 tmp
.update_bitmask (bm
);
6372 ts
->m_vr
->quick_push (vr
);
6376 ipa_vr
vr (plats
->m_value_range
.m_vr
);
6377 ts
->m_vr
->quick_push (vr
);
6382 tree type
= ipa_get_type (info
, i
);
6384 tmp
.set_varying (type
);
6385 irange_bitmask
bm (wide_int::from (bits
->get_value (),
6386 TYPE_PRECISION (type
),
6388 wide_int::from (bits
->get_mask (),
6389 TYPE_PRECISION (type
),
6391 tmp
.update_bitmask (bm
);
6393 ts
->m_vr
->quick_push (vr
);
6398 ts
->m_vr
->quick_push (vr
);
6401 if (!dump_file
|| !bits
)
6406 fprintf (dump_file
, "Propagated bits info for function %s:\n",
6407 node
->dump_name ());
6410 fprintf (dump_file
, " param %i: value = ", i
);
6411 print_hex (bits
->get_value (), dump_file
);
6412 fprintf (dump_file
, ", mask = ");
6413 print_hex (bits
->get_mask (), dump_file
);
6414 fprintf (dump_file
, "\n");
6419 /* The IPCP driver. */
6424 class ipa_topo_info topo
;
6426 if (edge_clone_summaries
== NULL
)
6427 edge_clone_summaries
= new edge_clone_summary_t (symtab
);
6429 ipa_check_create_node_params ();
6430 ipa_check_create_edge_args ();
6431 clone_num_suffixes
= new hash_map
<const char *, unsigned>;
6435 fprintf (dump_file
, "\nIPA structures before propagation:\n");
6436 if (dump_flags
& TDF_DETAILS
)
6437 ipa_print_all_params (dump_file
);
6438 ipa_print_all_jump_functions (dump_file
);
6441 /* Topological sort. */
6442 build_toporder_info (&topo
);
6443 /* Do the interprocedural propagation. */
6444 ipcp_propagate_stage (&topo
);
6445 /* Decide what constant propagation and cloning should be performed. */
6446 ipcp_decision_stage (&topo
);
6447 /* Store results of value range and bits propagation. */
6448 ipcp_store_vr_results ();
6450 /* Free all IPCP structures. */
6451 delete clone_num_suffixes
;
6452 free_toporder_info (&topo
);
6453 delete edge_clone_summaries
;
6454 edge_clone_summaries
= NULL
;
6455 ipa_free_all_structures_after_ipa_cp ();
6457 fprintf (dump_file
, "\nIPA constant propagation end\n");
6461 /* Initialization and computation of IPCP data structures. This is the initial
6462 intraprocedural analysis of functions, which gathers information to be
6463 propagated later on. */
6466 ipcp_generate_summary (void)
6468 struct cgraph_node
*node
;
6471 fprintf (dump_file
, "\nIPA constant propagation start:\n");
6472 ipa_register_cgraph_hooks ();
6474 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node
)
6475 ipa_analyze_node (node
);
6480 const pass_data pass_data_ipa_cp
=
6482 IPA_PASS
, /* type */
6484 OPTGROUP_NONE
, /* optinfo_flags */
6485 TV_IPA_CONSTANT_PROP
, /* tv_id */
6486 0, /* properties_required */
6487 0, /* properties_provided */
6488 0, /* properties_destroyed */
6489 0, /* todo_flags_start */
6490 ( TODO_dump_symtab
| TODO_remove_functions
), /* todo_flags_finish */
6493 class pass_ipa_cp
: public ipa_opt_pass_d
6496 pass_ipa_cp (gcc::context
*ctxt
)
6497 : ipa_opt_pass_d (pass_data_ipa_cp
, ctxt
,
6498 ipcp_generate_summary
, /* generate_summary */
6499 NULL
, /* write_summary */
6500 NULL
, /* read_summary */
6501 ipcp_write_transformation_summaries
, /*
6502 write_optimization_summary */
6503 ipcp_read_transformation_summaries
, /*
6504 read_optimization_summary */
6505 NULL
, /* stmt_fixup */
6506 0, /* function_transform_todo_flags_start */
6507 ipcp_transform_function
, /* function_transform */
6508 NULL
) /* variable_transform */
6511 /* opt_pass methods: */
6512 bool gate (function
*) final override
6514 /* FIXME: We should remove the optimize check after we ensure we never run
6515 IPA passes when not optimizing. */
6516 return (flag_ipa_cp
&& optimize
) || in_lto_p
;
6519 unsigned int execute (function
*) final override
{ return ipcp_driver (); }
6521 }; // class pass_ipa_cp
6526 make_pass_ipa_cp (gcc::context
*ctxt
)
6528 return new pass_ipa_cp (ctxt
);
6531 /* Reset all state within ipa-cp.cc so that we can rerun the compiler
6532 within the same process. For use by toplev::finalize. */
6535 ipa_cp_cc_finalize (void)
6537 base_count
= profile_count::uninitialized ();
6539 orig_overall_size
= 0;
6540 ipcp_free_transformation_sum ();
6543 /* Given PARAM which must be a parameter of function FNDECL described by THIS,
6544 return its index in the DECL_ARGUMENTS chain, using a pre-computed
6545 DECL_UID-sorted vector if available (which is pre-computed only if there are
6546 many parameters). Can return -1 if param is static chain not represented
6547 among DECL_ARGUMENTS. */
6550 ipcp_transformation::get_param_index (const_tree fndecl
, const_tree param
) const
6552 gcc_assert (TREE_CODE (param
) == PARM_DECL
);
6555 unsigned puid
= DECL_UID (param
);
6556 const ipa_uid_to_idx_map_elt
*res
6557 = std::lower_bound (m_uid_to_idx
->begin(), m_uid_to_idx
->end (), puid
,
6558 [] (const ipa_uid_to_idx_map_elt
&elt
, unsigned uid
)
6560 return elt
.uid
< uid
;
6562 if (res
== m_uid_to_idx
->end ()
6563 || res
->uid
!= puid
)
6565 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6572 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6576 gcc_assert (DECL_STATIC_CHAIN (fndecl
));
6580 /* Helper function to qsort a vector of ipa_uid_to_idx_map_elt elements
6581 according to the uid. */
6584 compare_uids (const void *a
, const void *b
)
6586 const ipa_uid_to_idx_map_elt
*e1
= (const ipa_uid_to_idx_map_elt
*) a
;
6587 const ipa_uid_to_idx_map_elt
*e2
= (const ipa_uid_to_idx_map_elt
*) b
;
6588 if (e1
->uid
< e2
->uid
)
6590 if (e1
->uid
> e2
->uid
)
6595 /* Assuming THIS describes FNDECL and it has sufficiently many parameters to
6596 justify the overhead, create a DECL_UID-sorted vector to speed up mapping
6597 from parameters to their indices in DECL_ARGUMENTS chain. */
6600 ipcp_transformation::maybe_create_parm_idx_map (tree fndecl
)
6602 int c
= count_formal_params (fndecl
);
6606 m_uid_to_idx
= NULL
;
6607 vec_safe_reserve (m_uid_to_idx
, c
, true);
6609 for (tree p
= DECL_ARGUMENTS (fndecl
); p
; p
= DECL_CHAIN (p
), index
++)
6611 ipa_uid_to_idx_map_elt elt
;
6612 elt
.uid
= DECL_UID (p
);
6614 m_uid_to_idx
->quick_push (elt
);
6616 m_uid_to_idx
->qsort (compare_uids
);