libstdc++: Refactor loops in std::__platform_semaphore
[official-gcc.git] / gcc / ipa-fnsummary.cc
blobb382478340659e7c500a9a9fd82b7125f5b06599
1 /* Function summary pass.
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Jan Hubicka
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Analysis of function bodies used by inter-procedural passes
23 We estimate for each function
24 - function body size and size after specializing into given context
25 - average function execution time in a given context
26 - function frame size
27 For each call
28 - call statement size, time and how often the parameters change
30 ipa_fn_summary data structures store above information locally (i.e.
31 parameters of the function itself) and globally (i.e. parameters of
32 the function created by applying all the inline decisions already
33 present in the callgraph).
35 We provide access to the ipa_fn_summary data structure and
36 basic logic updating the parameters when inlining is performed.
38 The summaries are context sensitive. Context means
39 1) partial assignment of known constant values of operands
40 2) whether function is inlined into the call or not.
41 It is easy to add more variants. To represent function size and time
42 that depends on context (i.e. it is known to be optimized away when
43 context is known either by inlining or from IP-CP and cloning),
44 we use predicates.
46 estimate_edge_size_and_time can be used to query
47 function size/time in the given context. ipa_merge_fn_summary_after_inlining merges
48 properties of caller and callee after inlining.
50 Finally pass_inline_parameters is exported. This is used to drive
51 computation of function parameters used by the early inliner. IPA
52 inlined performs analysis via its analyze_function method. */
54 #include "config.h"
55 #define INCLUDE_VECTOR
56 #include "system.h"
57 #include "coretypes.h"
58 #include "backend.h"
59 #include "target.h"
60 #include "tree.h"
61 #include "gimple.h"
62 #include "alloc-pool.h"
63 #include "tree-pass.h"
64 #include "ssa.h"
65 #include "tree-streamer.h"
66 #include "cgraph.h"
67 #include "diagnostic.h"
68 #include "fold-const.h"
69 #include "print-tree.h"
70 #include "tree-inline.h"
71 #include "gimple-pretty-print.h"
72 #include "cfganal.h"
73 #include "gimple-iterator.h"
74 #include "tree-cfg.h"
75 #include "tree-ssa-loop-niter.h"
76 #include "tree-ssa-loop.h"
77 #include "symbol-summary.h"
78 #include "sreal.h"
79 #include "ipa-cp.h"
80 #include "ipa-prop.h"
81 #include "ipa-fnsummary.h"
82 #include "cfgloop.h"
83 #include "tree-scalar-evolution.h"
84 #include "ipa-utils.h"
85 #include "cfgexpand.h"
86 #include "gimplify.h"
87 #include "stringpool.h"
88 #include "attribs.h"
89 #include "tree-into-ssa.h"
90 #include "symtab-clones.h"
91 #include "gimple-range.h"
92 #include "tree-dfa.h"
94 /* Summaries. */
95 fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
96 fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
97 fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
99 /* Edge predicates goes here. */
100 static object_allocator<ipa_predicate> edge_predicate_pool ("edge predicates");
103 /* Dump IPA hints. */
104 void
105 ipa_dump_hints (FILE *f, ipa_hints hints)
107 if (!hints)
108 return;
109 fprintf (f, "IPA hints:");
110 if (hints & INLINE_HINT_indirect_call)
112 hints &= ~INLINE_HINT_indirect_call;
113 fprintf (f, " indirect_call");
115 if (hints & INLINE_HINT_loop_iterations)
117 hints &= ~INLINE_HINT_loop_iterations;
118 fprintf (f, " loop_iterations");
120 if (hints & INLINE_HINT_loop_stride)
122 hints &= ~INLINE_HINT_loop_stride;
123 fprintf (f, " loop_stride");
125 if (hints & INLINE_HINT_same_scc)
127 hints &= ~INLINE_HINT_same_scc;
128 fprintf (f, " same_scc");
130 if (hints & INLINE_HINT_in_scc)
132 hints &= ~INLINE_HINT_in_scc;
133 fprintf (f, " in_scc");
135 if (hints & INLINE_HINT_cross_module)
137 hints &= ~INLINE_HINT_cross_module;
138 fprintf (f, " cross_module");
140 if (hints & INLINE_HINT_declared_inline)
142 hints &= ~INLINE_HINT_declared_inline;
143 fprintf (f, " declared_inline");
145 if (hints & INLINE_HINT_known_hot)
147 hints &= ~INLINE_HINT_known_hot;
148 fprintf (f, " known_hot");
150 if (hints & INLINE_HINT_builtin_constant_p)
152 hints &= ~INLINE_HINT_builtin_constant_p;
153 fprintf (f, " builtin_constant_p");
155 gcc_assert (!hints);
159 /* Record SIZE and TIME to SUMMARY.
160 The accounted code will be executed when EXEC_PRED is true.
161 When NONCONST_PRED is false the code will evaluate to constant and
162 will get optimized out in specialized clones of the function.
163 If CALL is true account to call_size_time_table rather than
164 size_time_table. */
166 void
167 ipa_fn_summary::account_size_time (int size, sreal time,
168 const ipa_predicate &exec_pred,
169 const ipa_predicate &nonconst_pred_in,
170 bool call)
172 size_time_entry *e;
173 bool found = false;
174 int i;
175 ipa_predicate nonconst_pred;
176 vec<size_time_entry> *table = call ? &call_size_time_table : &size_time_table;
178 if (exec_pred == false)
179 return;
181 nonconst_pred = nonconst_pred_in & exec_pred;
183 if (nonconst_pred == false)
184 return;
186 /* We need to create initial empty unconditional clause, but otherwise
187 we don't need to account empty times and sizes. */
188 if (!size && time == 0 && table->length ())
189 return;
191 /* Only for calls we are unaccounting what we previously recorded. */
192 gcc_checking_assert (time >= 0 || call);
194 for (i = 0; table->iterate (i, &e); i++)
195 if (e->exec_predicate == exec_pred
196 && e->nonconst_predicate == nonconst_pred)
198 found = true;
199 break;
201 if (i == max_size_time_table_size)
203 i = 0;
204 found = true;
205 e = &(*table)[0];
206 if (dump_file && (dump_flags & TDF_DETAILS))
207 fprintf (dump_file,
208 "\t\tReached limit on number of entries, "
209 "ignoring the predicate.");
211 if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size))
213 fprintf (dump_file,
214 "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:",
215 ((double) size) / ipa_fn_summary::size_scale,
216 (time.to_double ()), found ? "" : "new ");
217 exec_pred.dump (dump_file, conds, 0);
218 if (exec_pred != nonconst_pred)
220 fprintf (dump_file, " nonconst:");
221 nonconst_pred.dump (dump_file, conds);
223 else
224 fprintf (dump_file, "\n");
226 if (!found)
228 class size_time_entry new_entry;
229 new_entry.size = size;
230 new_entry.time = time;
231 new_entry.exec_predicate = exec_pred;
232 new_entry.nonconst_predicate = nonconst_pred;
233 if (call)
234 call_size_time_table.safe_push (new_entry);
235 else
236 size_time_table.safe_push (new_entry);
238 else
240 e->size += size;
241 e->time += time;
242 /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */
243 /* Tolerate small roundoff issues. */
244 if (e->time < 0)
245 e->time = 0;
249 /* We proved E to be unreachable, redirect it to __builtin_unreachable. */
251 static struct cgraph_edge *
252 redirect_to_unreachable (struct cgraph_edge *e)
254 struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL;
255 struct cgraph_node *target
256 = cgraph_node::get_create (builtin_decl_unreachable ());
258 if (e->speculative)
259 e = cgraph_edge::resolve_speculation (e, target->decl);
260 else if (!e->callee)
261 e = cgraph_edge::make_direct (e, target);
262 else
263 e->redirect_callee (target);
264 class ipa_call_summary *es = ipa_call_summaries->get (e);
265 e->inline_failed = CIF_UNREACHABLE;
266 e->count = profile_count::zero ();
267 es->call_stmt_size = 0;
268 es->call_stmt_time = 0;
269 if (callee)
270 callee->remove_symbol_and_inline_clones ();
271 return e;
274 /* Set predicate for edge E. */
276 static void
277 edge_set_predicate (struct cgraph_edge *e, ipa_predicate *predicate)
279 /* If the edge is determined to be never executed, redirect it
280 to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will
281 be optimized out. */
282 if (predicate && *predicate == false
283 /* When handling speculative edges, we need to do the redirection
284 just once. Do it always on the direct edge, so we do not
285 attempt to resolve speculation while duplicating the edge. */
286 && (!e->speculative || e->callee))
287 e = redirect_to_unreachable (e);
289 class ipa_call_summary *es = ipa_call_summaries->get (e);
290 if (predicate && *predicate != true)
292 if (!es->predicate)
293 es->predicate = edge_predicate_pool.allocate ();
294 *es->predicate = *predicate;
296 else
298 if (es->predicate)
299 edge_predicate_pool.remove (es->predicate);
300 es->predicate = NULL;
304 /* Set predicate for hint *P. */
306 static void
307 set_hint_predicate (ipa_predicate **p, ipa_predicate new_predicate)
309 if (new_predicate == false || new_predicate == true)
311 if (*p)
312 edge_predicate_pool.remove (*p);
313 *p = NULL;
315 else
317 if (!*p)
318 *p = edge_predicate_pool.allocate ();
319 **p = new_predicate;
323 /* Find if NEW_PREDICATE is already in V and if so, increment its freq.
324 Otherwise add a new item to the vector with this predicate and frerq equal
325 to add_freq, unless the number of predicates would exceed MAX_NUM_PREDICATES
326 in which case the function does nothing. */
328 static void
329 add_freqcounting_predicate (vec<ipa_freqcounting_predicate, va_gc> **v,
330 const ipa_predicate &new_predicate, sreal add_freq,
331 unsigned max_num_predicates)
333 if (new_predicate == false || new_predicate == true)
334 return;
335 ipa_freqcounting_predicate *f;
336 for (int i = 0; vec_safe_iterate (*v, i, &f); i++)
337 if (new_predicate == f->predicate)
339 f->freq += add_freq;
340 return;
342 if (vec_safe_length (*v) >= max_num_predicates)
343 /* Too many different predicates to account for. */
344 return;
346 ipa_freqcounting_predicate fcp;
347 fcp.predicate = NULL;
348 set_hint_predicate (&fcp.predicate, new_predicate);
349 fcp.freq = add_freq;
350 vec_safe_push (*v, fcp);
351 return;
354 /* Compute what conditions may or may not hold given information about
355 parameters. RET_CLAUSE returns truths that may hold in a specialized copy,
356 while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized
357 copy when called in a given context. It is a bitmask of conditions. Bit
358 0 means that condition is known to be false, while bit 1 means that condition
359 may or may not be true. These differs - for example NOT_INLINED condition
360 is always false in the second and also builtin_constant_p tests cannot use
361 the fact that parameter is indeed a constant.
363 When INLINE_P is true, assume that we are inlining. AVAL contains known
364 information about argument values. The function does not modify its content
365 and so AVALs could also be of type ipa_call_arg_values but so far all
366 callers work with the auto version and so we avoid the conversion for
367 convenience.
369 ERROR_MARK value of an argument means compile time invariant. */
371 static void
372 evaluate_conditions_for_known_args (struct cgraph_node *node,
373 bool inline_p,
374 ipa_auto_call_arg_values *avals,
375 clause_t *ret_clause,
376 clause_t *ret_nonspec_clause,
377 ipa_call_summary *es)
379 clause_t clause = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
380 clause_t nonspec_clause = 1 << ipa_predicate::not_inlined_condition;
381 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
382 int i;
383 struct condition *c;
385 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
387 tree val = NULL;
388 tree res;
389 int j;
390 struct expr_eval_op *op;
392 if (c->code == ipa_predicate::not_sra_candidate)
394 if (!inline_p
395 || !es
396 || (int)es->param.length () <= c->operand_num
397 || !es->param[c->operand_num].points_to_possible_sra_candidate)
398 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
399 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
400 continue;
403 if (c->agg_contents)
405 if (c->code == ipa_predicate::changed
406 && !c->by_ref
407 && (avals->safe_sval_at(c->operand_num) == error_mark_node))
408 continue;
410 if (tree sval = avals->safe_sval_at (c->operand_num))
411 val = ipa_find_agg_cst_from_init (sval, c->offset, c->by_ref);
412 if (!val)
414 ipa_argagg_value_list avs (avals);
415 val = avs.get_value (c->operand_num, c->offset / BITS_PER_UNIT,
416 c->by_ref);
419 else
421 val = avals->safe_sval_at (c->operand_num);
422 if (val && val == error_mark_node
423 && c->code != ipa_predicate::changed)
424 val = NULL_TREE;
427 if (!val
428 && (c->code == ipa_predicate::changed
429 || c->code == ipa_predicate::is_not_constant))
431 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
432 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
433 continue;
435 if (c->code == ipa_predicate::changed)
437 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
438 continue;
441 if (c->code == ipa_predicate::is_not_constant)
443 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
444 continue;
447 if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val)))
449 if (c->type != TREE_TYPE (val))
450 val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
451 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
453 if (!val)
454 break;
455 if (!op->val[0])
456 val = fold_unary (op->code, op->type, val);
457 else if (!op->val[1])
458 val = fold_binary (op->code, op->type,
459 op->index ? op->val[0] : val,
460 op->index ? val : op->val[0]);
461 else if (op->index == 0)
462 val = fold_ternary (op->code, op->type,
463 val, op->val[0], op->val[1]);
464 else if (op->index == 1)
465 val = fold_ternary (op->code, op->type,
466 op->val[0], val, op->val[1]);
467 else if (op->index == 2)
468 val = fold_ternary (op->code, op->type,
469 op->val[0], op->val[1], val);
470 else
471 val = NULL_TREE;
474 res = val
475 ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
476 : NULL;
478 if (res && integer_zerop (res))
479 continue;
480 if (res && integer_onep (res))
482 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
483 nonspec_clause
484 |= 1 << (i + ipa_predicate::first_dynamic_condition);
485 continue;
488 if (c->operand_num < (int) avals->m_known_value_ranges.length ()
489 && !c->agg_contents
490 && (!val || TREE_CODE (val) != INTEGER_CST))
492 value_range vr (avals->m_known_value_ranges[c->operand_num]);
493 if (!vr.undefined_p ()
494 && !vr.varying_p ()
495 && (TYPE_SIZE (c->type) == TYPE_SIZE (vr.type ())))
497 if (!useless_type_conversion_p (c->type, vr.type ()))
498 range_cast (vr, c->type);
500 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
502 if (vr.varying_p () || vr.undefined_p ())
503 break;
505 value_range res (op->type);
506 if (!op->val[0])
508 value_range varying (op->type);
509 varying.set_varying (op->type);
510 range_op_handler handler (op->code);
511 if (!handler
512 || !res.supports_type_p (op->type)
513 || !handler.fold_range (res, op->type, vr, varying))
514 res.set_varying (op->type);
516 else if (!op->val[1])
518 value_range op0 (TREE_TYPE (op->val[0]));
519 range_op_handler handler (op->code);
521 ipa_range_set_and_normalize (op0, op->val[0]);
523 if (!handler
524 || !res.supports_type_p (op->type)
525 || !handler.fold_range (res, op->type,
526 op->index ? op0 : vr,
527 op->index ? vr : op0))
528 res.set_varying (op->type);
530 else
531 res.set_varying (op->type);
532 vr = res;
534 if (!vr.varying_p () && !vr.undefined_p ())
536 int_range<2> res;
537 value_range val_vr (TREE_TYPE (c->val));
538 range_op_handler handler (c->code);
540 ipa_range_set_and_normalize (val_vr, c->val);
542 if (!handler
543 || !val_vr.supports_type_p (TREE_TYPE (c->val))
544 || !handler.fold_range (res, boolean_type_node, vr, val_vr))
545 res.set_varying (boolean_type_node);
547 if (res.zero_p ())
548 continue;
553 clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
554 nonspec_clause |= 1 << (i + ipa_predicate::first_dynamic_condition);
556 *ret_clause = clause;
557 if (ret_nonspec_clause)
558 *ret_nonspec_clause = nonspec_clause;
561 /* Return true if VRP will be exectued on the function.
562 We do not want to anticipate optimizations that will not happen.
564 FIXME: This can be confused with -fdisable and debug counters and thus
565 it should not be used for correctness (only to make heuristics work).
566 This means that inliner should do its own optimizations of expressions
567 that it predicts to be constant so wrong code can not be triggered by
568 builtin_constant_p. */
570 static bool
571 vrp_will_run_p (struct cgraph_node *node)
573 return (opt_for_fn (node->decl, optimize)
574 && !opt_for_fn (node->decl, optimize_debug)
575 && opt_for_fn (node->decl, flag_tree_vrp));
578 /* Similarly about FRE. */
580 static bool
581 fre_will_run_p (struct cgraph_node *node)
583 return (opt_for_fn (node->decl, optimize)
584 && !opt_for_fn (node->decl, optimize_debug)
585 && opt_for_fn (node->decl, flag_tree_fre));
588 /* Work out what conditions might be true at invocation of E.
589 Compute costs for inlined edge if INLINE_P is true.
591 Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR
592 (if non-NULL) conditions evaluated for nonspecialized clone called
593 in a given context.
595 Vectors in AVALS will be populated with useful known information about
596 argument values - information not known to have any uses will be omitted -
597 except for m_known_contexts which will only be calculated if
598 COMPUTE_CONTEXTS is true. */
600 void
601 evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
602 clause_t *clause_ptr,
603 clause_t *nonspec_clause_ptr,
604 ipa_auto_call_arg_values *avals,
605 bool compute_contexts)
607 struct cgraph_node *callee = e->callee->ultimate_alias_target ();
608 class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
609 class ipa_edge_args *args;
610 class ipa_call_summary *es = NULL;
612 if (clause_ptr)
613 *clause_ptr = inline_p ? 0 : 1 << ipa_predicate::not_inlined_condition;
615 if (ipa_node_params_sum
616 && !e->call_stmt_cannot_inline_p
617 && (info->conds || compute_contexts)
618 && (args = ipa_edge_args_sum->get (e)) != NULL)
620 struct cgraph_node *caller;
621 class ipa_node_params *caller_parms_info, *callee_pi = NULL;
622 int i, count = ipa_get_cs_argument_count (args);
623 es = ipa_call_summaries->get (e);
625 if (count)
627 if (e->caller->inlined_to)
628 caller = e->caller->inlined_to;
629 else
630 caller = e->caller;
631 caller_parms_info = ipa_node_params_sum->get (caller);
632 callee_pi = ipa_node_params_sum->get (callee);
634 /* Watch for thunks. */
635 if (callee_pi)
636 /* Watch for variadic functions. */
637 count = MIN (count, ipa_get_param_count (callee_pi));
640 if (callee_pi)
641 for (i = 0; i < count; i++)
643 struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
645 if (ipa_is_param_used_by_indirect_call (callee_pi, i)
646 || ipa_is_param_used_by_ipa_predicates (callee_pi, i))
648 /* Determine if we know constant value of the parameter. */
649 tree type = ipa_get_type (callee_pi, i);
650 tree cst = ipa_value_from_jfunc (caller_parms_info, jf, type);
652 if (!cst && e->call_stmt
653 && i < (int)gimple_call_num_args (e->call_stmt))
655 cst = gimple_call_arg (e->call_stmt, i);
656 if (!is_gimple_min_invariant (cst))
657 cst = NULL;
659 if (cst)
661 gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
662 if (!avals->m_known_vals.length ())
663 avals->m_known_vals.safe_grow_cleared (count, true);
664 avals->m_known_vals[i] = cst;
666 else if (inline_p && !es->param[i].change_prob)
668 if (!avals->m_known_vals.length ())
669 avals->m_known_vals.safe_grow_cleared (count, true);
670 avals->m_known_vals[i] = error_mark_node;
673 /* If we failed to get simple constant, try value range. */
674 if ((!cst || TREE_CODE (cst) != INTEGER_CST)
675 && vrp_will_run_p (caller)
676 && ipa_is_param_used_by_ipa_predicates (callee_pi, i))
678 value_range vr (type);
680 ipa_value_range_from_jfunc (vr, caller_parms_info, e, jf, type);
681 if (!vr.undefined_p () && !vr.varying_p ())
683 if (!avals->m_known_value_ranges.length ())
685 avals->m_known_value_ranges.safe_grow_cleared (count,
686 true);
687 for (int i = 0; i < count; ++i)
688 avals->m_known_value_ranges[i].set_type (void_type_node);
690 avals->m_known_value_ranges[i] = vr;
694 /* Determine known aggregate values. */
695 if (fre_will_run_p (caller))
696 ipa_push_agg_values_from_jfunc (caller_parms_info,
697 caller, &jf->agg, i,
698 &avals->m_known_aggs);
701 /* For calls used in polymorphic calls we further determine
702 polymorphic call context. */
703 if (compute_contexts
704 && ipa_is_param_used_by_polymorphic_call (callee_pi, i))
706 ipa_polymorphic_call_context
707 ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
708 if (!ctx.useless_p ())
710 if (!avals->m_known_contexts.length ())
711 avals->m_known_contexts.safe_grow_cleared (count, true);
712 avals->m_known_contexts[i]
713 = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
717 else
718 gcc_assert (!count || callee->thunk);
720 else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds)
722 int i, count = (int)gimple_call_num_args (e->call_stmt);
724 for (i = 0; i < count; i++)
726 tree cst = gimple_call_arg (e->call_stmt, i);
727 if (!is_gimple_min_invariant (cst))
728 cst = NULL;
729 if (cst)
731 if (!avals->m_known_vals.length ())
732 avals->m_known_vals.safe_grow_cleared (count, true);
733 avals->m_known_vals[i] = cst;
738 evaluate_conditions_for_known_args (callee, inline_p, avals, clause_ptr,
739 nonspec_clause_ptr, es);
743 /* Allocate the function summary. */
745 static void
746 ipa_fn_summary_alloc (void)
748 gcc_checking_assert (!ipa_fn_summaries);
749 ipa_size_summaries = new ipa_size_summary_t (symtab);
750 ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
751 ipa_call_summaries = new ipa_call_summary_t (symtab);
754 ipa_call_summary::~ipa_call_summary ()
756 if (predicate)
757 edge_predicate_pool.remove (predicate);
759 param.release ();
762 ipa_fn_summary::~ipa_fn_summary ()
764 unsigned len = vec_safe_length (loop_iterations);
765 for (unsigned i = 0; i < len; i++)
766 edge_predicate_pool.remove ((*loop_iterations)[i].predicate);
767 len = vec_safe_length (loop_strides);
768 for (unsigned i = 0; i < len; i++)
769 edge_predicate_pool.remove ((*loop_strides)[i].predicate);
770 vec_free (conds);
771 call_size_time_table.release ();
772 vec_free (loop_iterations);
773 vec_free (loop_strides);
774 builtin_constant_p_parms.release ();
777 void
778 ipa_fn_summary_t::remove_callees (cgraph_node *node)
780 cgraph_edge *e;
781 for (e = node->callees; e; e = e->next_callee)
782 ipa_call_summaries->remove (e);
783 for (e = node->indirect_calls; e; e = e->next_callee)
784 ipa_call_summaries->remove (e);
787 /* Duplicate predicates in loop hint vector, allocating memory for them and
788 remove and deallocate any uninteresting (true or false) ones. Return the
789 result. */
791 static vec<ipa_freqcounting_predicate, va_gc> *
792 remap_freqcounting_preds_after_dup (vec<ipa_freqcounting_predicate, va_gc> *v,
793 clause_t possible_truths)
795 if (vec_safe_length (v) == 0)
796 return NULL;
798 vec<ipa_freqcounting_predicate, va_gc> *res = v->copy ();
799 int len = res->length();
800 for (int i = len - 1; i >= 0; i--)
802 ipa_predicate new_predicate
803 = (*res)[i].predicate->remap_after_duplication (possible_truths);
804 /* We do not want to free previous predicate; it is used by node
805 origin. */
806 (*res)[i].predicate = NULL;
807 set_hint_predicate (&(*res)[i].predicate, new_predicate);
809 if (!(*res)[i].predicate)
810 res->unordered_remove (i);
813 return res;
817 /* Hook that is called by cgraph.cc when a node is duplicated. */
818 void
819 ipa_fn_summary_t::duplicate (cgraph_node *src,
820 cgraph_node *dst,
821 ipa_fn_summary *src_info,
822 ipa_fn_summary *info)
824 new (info) ipa_fn_summary (*src_info);
825 /* TODO: as an optimization, we may avoid copying conditions
826 that are known to be false or true. */
827 info->conds = vec_safe_copy (info->conds);
829 clone_info *cinfo = clone_info::get (dst);
830 /* When there are any replacements in the function body, see if we can figure
831 out that something was optimized out. */
832 if (ipa_node_params_sum && cinfo && cinfo->tree_map)
834 /* Use SRC parm info since it may not be copied yet. */
835 ipa_node_params *parms_info = ipa_node_params_sum->get (src);
836 ipa_auto_call_arg_values avals;
837 int count = ipa_get_param_count (parms_info);
838 int i, j;
839 clause_t possible_truths;
840 ipa_predicate true_pred = true;
841 size_time_entry *e;
842 int optimized_out_size = 0;
843 bool inlined_to_p = false;
844 struct cgraph_edge *edge, *next;
846 info->size_time_table.release ();
847 avals.m_known_vals.safe_grow_cleared (count, true);
848 for (i = 0; i < count; i++)
850 struct ipa_replace_map *r;
852 for (j = 0; vec_safe_iterate (cinfo->tree_map, j, &r); j++)
854 if (r->parm_num == i)
856 avals.m_known_vals[i] = r->new_tree;
857 break;
861 evaluate_conditions_for_known_args (dst, false,
862 &avals,
863 &possible_truths,
864 /* We are going to specialize,
865 so ignore nonspec truths. */
866 NULL,
867 NULL);
869 info->account_size_time (0, 0, true_pred, true_pred);
871 /* Remap size_time vectors.
872 Simplify the predicate by pruning out alternatives that are known
873 to be false.
874 TODO: as on optimization, we can also eliminate conditions known
875 to be true. */
876 for (i = 0; src_info->size_time_table.iterate (i, &e); i++)
878 ipa_predicate new_exec_pred;
879 ipa_predicate new_nonconst_pred;
880 new_exec_pred = e->exec_predicate.remap_after_duplication
881 (possible_truths);
882 new_nonconst_pred = e->nonconst_predicate.remap_after_duplication
883 (possible_truths);
884 if (new_exec_pred == false || new_nonconst_pred == false)
885 optimized_out_size += e->size;
886 else
887 info->account_size_time (e->size, e->time, new_exec_pred,
888 new_nonconst_pred);
891 /* Remap edge predicates with the same simplification as above.
892 Also copy constantness arrays. */
893 for (edge = dst->callees; edge; edge = next)
895 ipa_predicate new_predicate;
896 class ipa_call_summary *es = ipa_call_summaries->get (edge);
897 next = edge->next_callee;
899 if (!edge->inline_failed)
900 inlined_to_p = true;
901 if (!es->predicate)
902 continue;
903 new_predicate = es->predicate->remap_after_duplication
904 (possible_truths);
905 if (new_predicate == false && *es->predicate != false)
906 optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale;
907 edge_set_predicate (edge, &new_predicate);
910 /* Remap indirect edge predicates with the same simplification as above.
911 Also copy constantness arrays. */
912 for (edge = dst->indirect_calls; edge; edge = next)
914 ipa_predicate new_predicate;
915 class ipa_call_summary *es = ipa_call_summaries->get (edge);
916 next = edge->next_callee;
918 gcc_checking_assert (edge->inline_failed);
919 if (!es->predicate)
920 continue;
921 new_predicate = es->predicate->remap_after_duplication
922 (possible_truths);
923 if (new_predicate == false && *es->predicate != false)
924 optimized_out_size
925 += es->call_stmt_size * ipa_fn_summary::size_scale;
926 edge_set_predicate (edge, &new_predicate);
928 info->loop_iterations
929 = remap_freqcounting_preds_after_dup (info->loop_iterations,
930 possible_truths);
931 info->loop_strides
932 = remap_freqcounting_preds_after_dup (info->loop_strides,
933 possible_truths);
934 if (info->builtin_constant_p_parms.length())
936 vec <int, va_heap, vl_ptr> parms = info->builtin_constant_p_parms;
937 int ip;
938 info->builtin_constant_p_parms = vNULL;
939 for (i = 0; parms.iterate (i, &ip); i++)
940 if (!avals.m_known_vals[ip])
941 info->builtin_constant_p_parms.safe_push (ip);
944 /* If inliner or someone after inliner will ever start producing
945 non-trivial clones, we will get trouble with lack of information
946 about updating self sizes, because size vectors already contains
947 sizes of the callees. */
948 gcc_assert (!inlined_to_p || !optimized_out_size);
950 else
952 info->size_time_table = src_info->size_time_table.copy ();
953 info->loop_iterations = vec_safe_copy (src_info->loop_iterations);
954 info->loop_strides = vec_safe_copy (info->loop_strides);
956 info->builtin_constant_p_parms
957 = info->builtin_constant_p_parms.copy ();
959 ipa_freqcounting_predicate *f;
960 for (int i = 0; vec_safe_iterate (info->loop_iterations, i, &f); i++)
962 ipa_predicate p = *f->predicate;
963 f->predicate = NULL;
964 set_hint_predicate (&f->predicate, p);
966 for (int i = 0; vec_safe_iterate (info->loop_strides, i, &f); i++)
968 ipa_predicate p = *f->predicate;
969 f->predicate = NULL;
970 set_hint_predicate (&f->predicate, p);
973 if (!dst->inlined_to)
974 ipa_update_overall_fn_summary (dst);
978 /* Hook that is called by cgraph.cc when a node is duplicated. */
980 void
981 ipa_call_summary_t::duplicate (struct cgraph_edge *src,
982 struct cgraph_edge *dst,
983 class ipa_call_summary *srcinfo,
984 class ipa_call_summary *info)
986 new (info) ipa_call_summary (*srcinfo);
987 info->predicate = NULL;
988 edge_set_predicate (dst, srcinfo->predicate);
989 info->param = srcinfo->param.copy ();
990 if (!dst->indirect_unknown_callee && src->indirect_unknown_callee)
992 info->call_stmt_size -= (eni_size_weights.indirect_call_cost
993 - eni_size_weights.call_cost);
994 info->call_stmt_time -= (eni_time_weights.indirect_call_cost
995 - eni_time_weights.call_cost);
999 /* Dump edge summaries associated to NODE and recursively to all clones.
1000 Indent by INDENT. */
1002 static void
1003 dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
1004 class ipa_fn_summary *info)
1006 struct cgraph_edge *edge;
1007 for (edge = node->callees; edge; edge = edge->next_callee)
1009 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1010 struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
1011 int i;
1013 fprintf (f,
1014 "%*s%s %s\n%*s freq:%4.2f",
1015 indent, "", callee->dump_name (),
1016 !edge->inline_failed
1017 ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed),
1018 indent, "", edge->sreal_frequency ().to_double ());
1020 if (cross_module_call_p (edge))
1021 fprintf (f, " cross module");
1023 if (es)
1024 fprintf (f, " loop depth:%2i size:%2i time: %2i",
1025 es->loop_depth, es->call_stmt_size, es->call_stmt_time);
1027 ipa_fn_summary *s = ipa_fn_summaries->get (callee);
1028 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1029 if (s != NULL)
1030 fprintf (f, " callee size:%2i stack:%2i",
1031 (int) (ss->size / ipa_fn_summary::size_scale),
1032 (int) s->estimated_stack_size);
1034 if (es && es->predicate)
1036 fprintf (f, " predicate: ");
1037 es->predicate->dump (f, info->conds);
1039 else
1040 fprintf (f, "\n");
1041 if (es && es->param.exists ())
1042 for (i = 0; i < (int) es->param.length (); i++)
1044 int prob = es->param[i].change_prob;
1046 if (!prob)
1047 fprintf (f, "%*s op%i is compile time invariant\n",
1048 indent + 2, "", i);
1049 else if (prob != REG_BR_PROB_BASE)
1050 fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i,
1051 prob * 100.0 / REG_BR_PROB_BASE);
1052 if (es->param[i].points_to_local_or_readonly_memory)
1053 fprintf (f, "%*s op%i points to local or readonly memory\n",
1054 indent + 2, "", i);
1055 if (es->param[i].points_to_possible_sra_candidate)
1056 fprintf (f, "%*s op%i points to possible sra candidate\n",
1057 indent + 2, "", i);
1059 if (!edge->inline_failed)
1061 ipa_size_summary *ss = ipa_size_summaries->get (callee);
1062 fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
1063 indent + 2, "",
1064 (int) ipa_get_stack_frame_offset (callee),
1065 (int) ss->estimated_self_stack_size);
1066 dump_ipa_call_summary (f, indent + 2, callee, info);
1069 for (edge = node->indirect_calls; edge; edge = edge->next_callee)
1071 class ipa_call_summary *es = ipa_call_summaries->get (edge);
1072 fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
1073 " time: %2i",
1074 indent, "",
1075 es->loop_depth,
1076 edge->sreal_frequency ().to_double (), es->call_stmt_size,
1077 es->call_stmt_time);
1078 if (es->predicate)
1080 fprintf (f, "predicate: ");
1081 es->predicate->dump (f, info->conds);
1083 else
1084 fprintf (f, "\n");
1089 void
1090 ipa_dump_fn_summary (FILE *f, struct cgraph_node *node)
1092 if (node->definition)
1094 class ipa_fn_summary *s = ipa_fn_summaries->get (node);
1095 class ipa_size_summary *ss = ipa_size_summaries->get (node);
1096 if (s != NULL)
1098 size_time_entry *e;
1099 int i;
1100 fprintf (f, "IPA function summary for %s", node->dump_name ());
1101 if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
1102 fprintf (f, " always_inline");
1103 if (s->inlinable)
1104 fprintf (f, " inlinable");
1105 if (s->fp_expressions)
1106 fprintf (f, " fp_expression");
1107 if (s->builtin_constant_p_parms.length ())
1109 fprintf (f, " builtin_constant_p_parms");
1110 for (unsigned int i = 0;
1111 i < s->builtin_constant_p_parms.length (); i++)
1112 fprintf (f, " %i", s->builtin_constant_p_parms[i]);
1114 fprintf (f, "\n global time: %f\n", s->time.to_double ());
1115 fprintf (f, " self size: %i\n", ss->self_size);
1116 fprintf (f, " global size: %i\n", ss->size);
1117 fprintf (f, " min size: %i\n", s->min_size);
1118 fprintf (f, " self stack: %i\n",
1119 (int) ss->estimated_self_stack_size);
1120 fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size);
1121 if (s->growth)
1122 fprintf (f, " estimated growth:%i\n", (int) s->growth);
1123 if (s->scc_no)
1124 fprintf (f, " In SCC: %i\n", (int) s->scc_no);
1125 for (i = 0; s->size_time_table.iterate (i, &e); i++)
1127 fprintf (f, " size:%f, time:%f",
1128 (double) e->size / ipa_fn_summary::size_scale,
1129 e->time.to_double ());
1130 if (e->exec_predicate != true)
1132 fprintf (f, ", executed if:");
1133 e->exec_predicate.dump (f, s->conds, 0);
1135 if (e->exec_predicate != e->nonconst_predicate)
1137 fprintf (f, ", nonconst if:");
1138 e->nonconst_predicate.dump (f, s->conds, 0);
1140 fprintf (f, "\n");
1142 ipa_freqcounting_predicate *fcp;
1143 bool first_fcp = true;
1144 for (int i = 0; vec_safe_iterate (s->loop_iterations, i, &fcp); i++)
1146 if (first_fcp)
1148 fprintf (f, " loop iterations:");
1149 first_fcp = false;
1151 fprintf (f, " %3.2f for ", fcp->freq.to_double ());
1152 fcp->predicate->dump (f, s->conds);
1154 first_fcp = true;
1155 for (int i = 0; vec_safe_iterate (s->loop_strides, i, &fcp); i++)
1157 if (first_fcp)
1159 fprintf (f, " loop strides:");
1160 first_fcp = false;
1162 fprintf (f, " %3.2f for :", fcp->freq.to_double ());
1163 fcp->predicate->dump (f, s->conds);
1165 fprintf (f, " calls:\n");
1166 dump_ipa_call_summary (f, 4, node, s);
1167 fprintf (f, "\n");
1168 if (s->target_info)
1169 fprintf (f, " target_info: %x\n", s->target_info);
1171 else
1172 fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ());
1176 DEBUG_FUNCTION void
1177 ipa_debug_fn_summary (struct cgraph_node *node)
1179 ipa_dump_fn_summary (stderr, node);
1182 void
1183 ipa_dump_fn_summaries (FILE *f)
1185 struct cgraph_node *node;
1187 FOR_EACH_DEFINED_FUNCTION (node)
1188 if (!node->inlined_to)
1189 ipa_dump_fn_summary (f, node);
1192 /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the
1193 boolean variable pointed to by DATA. */
1195 static bool
1196 mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED,
1197 void *data)
1199 bool *b = (bool *) data;
1200 *b = true;
1201 return true;
1204 /* If OP refers to value of function parameter, return the corresponding
1205 parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the
1206 PARM_DECL) will be stored to *SIZE_P in that case too. */
1208 static tree
1209 unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
1210 poly_int64 *size_p)
1212 /* SSA_NAME referring to parm default def? */
1213 if (TREE_CODE (op) == SSA_NAME
1214 && SSA_NAME_IS_DEFAULT_DEF (op)
1215 && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
1217 if (size_p)
1218 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1219 return SSA_NAME_VAR (op);
1221 /* Non-SSA parm reference? */
1222 if (TREE_CODE (op) == PARM_DECL
1223 && fbi->aa_walk_budget > 0)
1225 bool modified = false;
1227 ao_ref refd;
1228 ao_ref_init (&refd, op);
1229 int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt),
1230 mark_modified, &modified, NULL, NULL,
1231 fbi->aa_walk_budget);
1232 if (walked < 0)
1234 fbi->aa_walk_budget = 0;
1235 return NULL_TREE;
1237 fbi->aa_walk_budget -= walked;
1238 if (!modified)
1240 if (size_p)
1241 *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
1242 return op;
1245 return NULL_TREE;
1248 /* If OP refers to value of function parameter, return the corresponding
1249 parameter. Also traverse chains of SSA register assignments. If non-NULL,
1250 the size of the memory load (or the SSA_NAME of the PARM_DECL) will be
1251 stored to *SIZE_P in that case too. */
1253 static tree
1254 unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
1255 poly_int64 *size_p)
1257 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1258 if (res)
1259 return res;
1261 if (TREE_CODE (op) == SSA_NAME
1262 && !SSA_NAME_IS_DEFAULT_DEF (op)
1263 && gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1264 return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op),
1265 gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)),
1266 size_p);
1267 return NULL_TREE;
1270 /* If OP refers to a value of a function parameter or value loaded from an
1271 aggregate passed to a parameter (either by value or reference), return TRUE
1272 and store the number of the parameter to *INDEX_P, the access size into
1273 *SIZE_P, and information whether and how it has been loaded from an
1274 aggregate into *AGGPOS. INFO describes the function parameters, STMT is the
1275 statement in which OP is used or loaded. */
1277 static bool
1278 unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
1279 gimple *stmt, tree op, int *index_p,
1280 poly_int64 *size_p,
1281 struct agg_position_info *aggpos)
1283 tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
1285 gcc_checking_assert (aggpos);
1286 if (res)
1288 *index_p = ipa_get_param_decl_index (fbi->info, res);
1289 if (*index_p < 0)
1290 return false;
1291 aggpos->agg_contents = false;
1292 aggpos->by_ref = false;
1293 return true;
1296 if (TREE_CODE (op) == SSA_NAME)
1298 if (SSA_NAME_IS_DEFAULT_DEF (op)
1299 || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op)))
1300 return false;
1301 stmt = SSA_NAME_DEF_STMT (op);
1302 op = gimple_assign_rhs1 (stmt);
1303 if (!REFERENCE_CLASS_P (op))
1304 return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p,
1305 aggpos);
1308 aggpos->agg_contents = true;
1309 return ipa_load_from_parm_agg (fbi, fbi->info->descriptors,
1310 stmt, op, index_p, &aggpos->offset,
1311 size_p, &aggpos->by_ref);
1314 /* If stmt is simple load or store of value pointed to by a function parmaeter,
1315 return its index. */
1317 static int
1318 load_or_store_of_ptr_parameter (ipa_func_body_info *fbi, gimple *stmt)
1320 if (!optimize)
1321 return -1;
1322 gassign *assign = dyn_cast <gassign *> (stmt);
1323 if (!assign)
1324 return -1;
1325 tree param;
1326 if (gimple_assign_load_p (stmt))
1327 param = gimple_assign_rhs1 (stmt);
1328 else if (gimple_store_p (stmt))
1329 param = gimple_assign_lhs (stmt);
1330 else
1331 return -1;
1332 tree base = get_base_address (param);
1333 if (TREE_CODE (base) != MEM_REF
1334 || TREE_CODE (TREE_OPERAND (base, 0)) != SSA_NAME
1335 || !SSA_NAME_IS_DEFAULT_DEF (TREE_OPERAND (base, 0)))
1336 return -1;
1337 tree p = SSA_NAME_VAR (TREE_OPERAND (base, 0));
1338 if (TREE_CODE (p) != PARM_DECL)
1339 return -1;
1340 return ipa_get_param_decl_index (fbi->info, p);
1343 /* See if statement might disappear after inlining.
1344 0 - means not eliminated
1345 1 - half of statements goes away
1346 2 - for sure it is eliminated.
1347 We are not terribly sophisticated, basically looking for simple abstraction
1348 penalty wrappers. */
1350 static int
1351 eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt)
1353 enum gimple_code code = gimple_code (stmt);
1354 enum tree_code rhs_code;
1356 if (!optimize)
1357 return 0;
1359 switch (code)
1361 case GIMPLE_RETURN:
1362 return 2;
1363 case GIMPLE_ASSIGN:
1364 if (gimple_num_ops (stmt) != 2)
1365 return 0;
1367 rhs_code = gimple_assign_rhs_code (stmt);
1369 /* Casts of parameters, loads from parameters passed by reference
1370 and stores to return value or parameters are often free after
1371 inlining due to SRA and further combining.
1372 Assume that half of statements goes away. */
1373 if (CONVERT_EXPR_CODE_P (rhs_code)
1374 || rhs_code == VIEW_CONVERT_EXPR
1375 || rhs_code == ADDR_EXPR
1376 || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
1378 tree rhs = gimple_assign_rhs1 (stmt);
1379 tree lhs = gimple_assign_lhs (stmt);
1380 tree inner_rhs = get_base_address (rhs);
1381 tree inner_lhs = get_base_address (lhs);
1382 bool rhs_free = false;
1383 bool lhs_free = false;
1385 if (!inner_rhs)
1386 inner_rhs = rhs;
1387 if (!inner_lhs)
1388 inner_lhs = lhs;
1390 /* Reads of parameter are expected to be free. */
1391 if (unmodified_parm (fbi, stmt, inner_rhs, NULL))
1392 rhs_free = true;
1393 /* Match expressions of form &this->field. Those will most likely
1394 combine with something upstream after inlining. */
1395 else if (TREE_CODE (inner_rhs) == ADDR_EXPR)
1397 tree op = get_base_address (TREE_OPERAND (inner_rhs, 0));
1398 if (TREE_CODE (op) == PARM_DECL)
1399 rhs_free = true;
1400 else if (TREE_CODE (op) == MEM_REF
1401 && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0),
1402 NULL))
1403 rhs_free = true;
1406 /* When parameter is not SSA register because its address is taken
1407 and it is just copied into one, the statement will be completely
1408 free after inlining (we will copy propagate backward). */
1409 if (rhs_free && is_gimple_reg (lhs))
1410 return 2;
1412 /* Reads of parameters passed by reference
1413 expected to be free (i.e. optimized out after inlining). */
1414 if (TREE_CODE (inner_rhs) == MEM_REF
1415 && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL))
1416 rhs_free = true;
1418 /* Copying parameter passed by reference into gimple register is
1419 probably also going to copy propagate, but we can't be quite
1420 sure. */
1421 if (rhs_free && is_gimple_reg (lhs))
1422 lhs_free = true;
1424 /* Writes to parameters, parameters passed by value and return value
1425 (either directly or passed via invisible reference) are free.
1427 TODO: We ought to handle testcase like
1428 struct a {int a,b;};
1429 struct a
1430 returnstruct (void)
1432 struct a a ={1,2};
1433 return a;
1436 This translate into:
1438 returnstruct ()
1440 int a$b;
1441 int a$a;
1442 struct a a;
1443 struct a D.2739;
1445 <bb 2>:
1446 D.2739.a = 1;
1447 D.2739.b = 2;
1448 return D.2739;
1451 For that we either need to copy ipa-split logic detecting writes
1452 to return value. */
1453 if (TREE_CODE (inner_lhs) == PARM_DECL
1454 || TREE_CODE (inner_lhs) == RESULT_DECL
1455 || (TREE_CODE (inner_lhs) == MEM_REF
1456 && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0),
1457 NULL)
1458 || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME
1459 && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0))
1460 && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND
1461 (inner_lhs,
1462 0))) == RESULT_DECL))))
1463 lhs_free = true;
1464 if (lhs_free
1465 && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs)))
1466 rhs_free = true;
1467 if (lhs_free && rhs_free)
1468 return 1;
1470 return 0;
1471 default:
1472 return 0;
1476 /* Analyze EXPR if it represents a series of simple operations performed on
1477 a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and
1478 AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
1479 Type of the parameter or load from an aggregate via the parameter is
1480 stored in *TYPE_P. Operations on the parameter are recorded to
1481 PARAM_OPS_P if it is not NULL. */
1483 static bool
1484 decompose_param_expr (struct ipa_func_body_info *fbi,
1485 gimple *stmt, tree expr,
1486 int *index_p, tree *type_p,
1487 struct agg_position_info *aggpos,
1488 expr_eval_ops *param_ops_p = NULL)
1490 int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops);
1491 int op_count = 0;
1493 if (param_ops_p)
1494 *param_ops_p = NULL;
1496 while (true)
1498 expr_eval_op eval_op;
1499 unsigned rhs_count;
1500 unsigned cst_count = 0;
1502 if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
1503 aggpos))
1505 tree type = TREE_TYPE (expr);
1507 if (aggpos->agg_contents)
1509 /* Stop if containing bit-field. */
1510 if (TREE_CODE (expr) == BIT_FIELD_REF
1511 || contains_bitfld_component_ref_p (expr))
1512 break;
1515 *type_p = type;
1516 return true;
1519 if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
1520 break;
1521 stmt = SSA_NAME_DEF_STMT (expr);
1523 if (gcall *call = dyn_cast <gcall *> (stmt))
1525 int flags = gimple_call_return_flags (call);
1526 if (!(flags & ERF_RETURNS_ARG))
1527 goto fail;
1528 int arg = flags & ERF_RETURN_ARG_MASK;
1529 if (arg >= (int)gimple_call_num_args (call))
1530 goto fail;
1531 expr = gimple_call_arg (stmt, arg);
1532 continue;
1535 if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
1536 break;
1538 switch (gimple_assign_rhs_class (stmt))
1540 case GIMPLE_SINGLE_RHS:
1541 expr = gimple_assign_rhs1 (stmt);
1542 continue;
1544 case GIMPLE_UNARY_RHS:
1545 rhs_count = 1;
1546 break;
1548 case GIMPLE_BINARY_RHS:
1549 rhs_count = 2;
1550 break;
1552 case GIMPLE_TERNARY_RHS:
1553 rhs_count = 3;
1554 break;
1556 default:
1557 goto fail;
1560 /* Stop if expression is too complex. */
1561 if (op_count++ == op_limit)
1562 break;
1564 if (param_ops_p)
1566 eval_op.code = gimple_assign_rhs_code (stmt);
1567 eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
1568 eval_op.val[0] = NULL_TREE;
1569 eval_op.val[1] = NULL_TREE;
1572 expr = NULL_TREE;
1573 for (unsigned i = 0; i < rhs_count; i++)
1575 tree op = gimple_op (stmt, i + 1);
1577 gcc_assert (op && !TYPE_P (op));
1578 if (is_gimple_ip_invariant (op))
1580 if (++cst_count == rhs_count)
1581 goto fail;
1583 eval_op.val[cst_count - 1] = op;
1585 else if (!expr)
1587 /* Found a non-constant operand, and record its index in rhs
1588 operands. */
1589 eval_op.index = i;
1590 expr = op;
1592 else
1594 /* Found more than one non-constant operands. */
1595 goto fail;
1599 if (param_ops_p)
1600 vec_safe_insert (*param_ops_p, 0, eval_op);
1603 /* Failed to decompose, free resource and return. */
1604 fail:
1605 if (param_ops_p)
1606 vec_free (*param_ops_p);
1608 return false;
1611 /* Record to SUMMARY that PARM is used by builtin_constant_p. */
1613 static void
1614 add_builtin_constant_p_parm (class ipa_fn_summary *summary, int parm)
1616 int ip;
1618 /* Avoid duplicates. */
1619 for (unsigned int i = 0;
1620 summary->builtin_constant_p_parms.iterate (i, &ip); i++)
1621 if (ip == parm)
1622 return;
1623 summary->builtin_constant_p_parms.safe_push (parm);
1626 /* If BB ends by a conditional we can turn into predicates, attach corresponding
1627 predicates to the CFG edges. */
1629 static void
1630 set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1631 class ipa_fn_summary *summary,
1632 class ipa_node_params *params_summary,
1633 basic_block bb)
1635 tree op, op2;
1636 int index;
1637 struct agg_position_info aggpos;
1638 enum tree_code code, inverted_code;
1639 edge e;
1640 edge_iterator ei;
1641 gimple *set_stmt;
1642 tree param_type;
1643 expr_eval_ops param_ops;
1645 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
1646 if (!last)
1647 return;
1648 if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
1649 return;
1650 op = gimple_cond_lhs (last);
1652 if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1653 &param_ops))
1655 code = gimple_cond_code (last);
1656 inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
1658 FOR_EACH_EDGE (e, ei, bb->succs)
1660 enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE
1661 ? code : inverted_code);
1662 /* invert_tree_comparison will return ERROR_MARK on FP
1663 comparisons that are not EQ/NE instead of returning proper
1664 unordered one. Be sure it is not confused with NON_CONSTANT.
1666 And if the edge's target is the final block of diamond CFG graph
1667 of this conditional statement, we do not need to compute
1668 predicate for the edge because the final block's predicate must
1669 be at least as that of the first block of the statement. */
1670 if (this_code != ERROR_MARK
1671 && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1673 ipa_predicate p
1674 = add_condition (summary, params_summary, index,
1675 param_type, &aggpos,
1676 this_code, gimple_cond_rhs (last), param_ops);
1677 e->aux = edge_predicate_pool.allocate ();
1678 *(ipa_predicate *) e->aux = p;
1681 vec_free (param_ops);
1682 return;
1685 if (TREE_CODE (op) != SSA_NAME)
1686 return;
1687 /* Special case
1688 if (builtin_constant_p (op))
1689 constant_code
1690 else
1691 nonconstant_code.
1692 Here we can predicate nonconstant_code. We can't
1693 really handle constant_code since we have no predicate
1694 for this and also the constant code is not known to be
1695 optimized away when inliner doesn't see operand is constant.
1696 Other optimizers might think otherwise. */
1697 if (gimple_cond_code (last) != NE_EXPR
1698 || !integer_zerop (gimple_cond_rhs (last)))
1699 return;
1700 set_stmt = SSA_NAME_DEF_STMT (op);
1701 if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P)
1702 || gimple_call_num_args (set_stmt) != 1)
1703 return;
1704 op2 = gimple_call_arg (set_stmt, 0);
1705 if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
1706 return;
1707 if (!aggpos.by_ref)
1708 add_builtin_constant_p_parm (summary, index);
1709 FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
1711 ipa_predicate p = add_condition (summary, params_summary, index,
1712 param_type, &aggpos,
1713 ipa_predicate::is_not_constant, NULL_TREE);
1714 e->aux = edge_predicate_pool.allocate ();
1715 *(ipa_predicate *) e->aux = p;
1720 /* If BB ends by a switch we can turn into predicates, attach corresponding
1721 predicates to the CFG edges. */
1723 static void
1724 set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
1725 class ipa_fn_summary *summary,
1726 class ipa_node_params *params_summary,
1727 basic_block bb)
1729 tree op;
1730 int index;
1731 struct agg_position_info aggpos;
1732 edge e;
1733 edge_iterator ei;
1734 size_t n;
1735 size_t case_idx;
1736 tree param_type;
1737 expr_eval_ops param_ops;
1739 gswitch *last = safe_dyn_cast <gswitch *> (*gsi_last_bb (bb));
1740 if (!last)
1741 return;
1742 op = gimple_switch_index (last);
1743 if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
1744 &param_ops))
1745 return;
1747 auto_vec<std::pair<tree, tree> > ranges;
1748 tree type = TREE_TYPE (op);
1749 int bound_limit = opt_for_fn (fbi->node->decl,
1750 param_ipa_max_switch_predicate_bounds);
1751 int bound_count = 0;
1752 // This can safely be an integer range, as switches can only hold
1753 // integers.
1754 int_range<2> vr;
1756 get_range_query (cfun)->range_of_expr (vr, op);
1757 if (vr.undefined_p ())
1758 vr.set_varying (TREE_TYPE (op));
1759 tree vr_min, vr_max;
1760 // TODO: This entire function could use a rewrite to use the irange
1761 // API, instead of trying to recreate its intersection/union logic.
1762 // Any use of get_legacy_range() is a serious code smell.
1763 value_range_kind vr_type = get_legacy_range (vr, vr_min, vr_max);
1764 wide_int vr_wmin = wi::to_wide (vr_min);
1765 wide_int vr_wmax = wi::to_wide (vr_max);
1767 FOR_EACH_EDGE (e, ei, bb->succs)
1769 e->aux = edge_predicate_pool.allocate ();
1770 *(ipa_predicate *) e->aux = false;
1773 e = gimple_switch_edge (cfun, last, 0);
1774 /* Set BOUND_COUNT to maximum count to bypass computing predicate for
1775 default case if its target basic block is in convergence point of all
1776 switch cases, which can be determined by checking whether it
1777 post-dominates the switch statement. */
1778 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1779 bound_count = INT_MAX;
1781 n = gimple_switch_num_labels (last);
1782 for (case_idx = 1; case_idx < n; ++case_idx)
1784 tree cl = gimple_switch_label (last, case_idx);
1785 tree min = CASE_LOW (cl);
1786 tree max = CASE_HIGH (cl);
1787 ipa_predicate p;
1789 e = gimple_switch_edge (cfun, last, case_idx);
1791 /* The case value might not have same type as switch expression,
1792 extend the value based on the expression type. */
1793 if (TREE_TYPE (min) != type)
1794 min = wide_int_to_tree (type, wi::to_wide (min));
1796 if (!max)
1797 max = min;
1798 else if (TREE_TYPE (max) != type)
1799 max = wide_int_to_tree (type, wi::to_wide (max));
1801 /* The case's target basic block is in convergence point of all switch
1802 cases, its predicate should be at least as that of the switch
1803 statement. */
1804 if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
1805 p = true;
1806 else if (min == max)
1807 p = add_condition (summary, params_summary, index, param_type,
1808 &aggpos, EQ_EXPR, min, param_ops);
1809 else
1811 ipa_predicate p1, p2;
1812 p1 = add_condition (summary, params_summary, index, param_type,
1813 &aggpos, GE_EXPR, min, param_ops);
1814 p2 = add_condition (summary, params_summary,index, param_type,
1815 &aggpos, LE_EXPR, max, param_ops);
1816 p = p1 & p2;
1818 *(ipa_predicate *) e->aux
1819 = p.or_with (summary->conds, *(ipa_predicate *) e->aux);
1821 /* If there are too many disjoint case ranges, predicate for default
1822 case might become too complicated. So add a limit here. */
1823 if (bound_count > bound_limit)
1824 continue;
1826 bool new_range = true;
1828 if (!ranges.is_empty ())
1830 wide_int curr_wmin = wi::to_wide (min);
1831 wide_int last_wmax = wi::to_wide (ranges.last ().second);
1833 /* Merge case ranges if they are continuous. */
1834 if (curr_wmin == last_wmax + 1)
1835 new_range = false;
1836 else if (vr_type == VR_ANTI_RANGE)
1838 /* If two disjoint case ranges can be connected by anti-range
1839 of switch index, combine them to one range. */
1840 if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
1841 vr_type = VR_UNDEFINED;
1842 else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
1843 new_range = false;
1847 /* Create/extend a case range. And we count endpoints of range set,
1848 this number nearly equals to number of conditions that we will create
1849 for predicate of default case. */
1850 if (new_range)
1852 bound_count += (min == max) ? 1 : 2;
1853 ranges.safe_push (std::make_pair (min, max));
1855 else
1857 bound_count += (ranges.last ().first == ranges.last ().second);
1858 ranges.last ().second = max;
1862 e = gimple_switch_edge (cfun, last, 0);
1863 if (bound_count > bound_limit)
1865 *(ipa_predicate *) e->aux = true;
1866 vec_free (param_ops);
1867 return;
1870 ipa_predicate p_seg = true;
1871 ipa_predicate p_all = false;
1873 if (vr_type != VR_RANGE)
1875 vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
1876 vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
1879 /* Construct predicate to represent default range set that is negation of
1880 all case ranges. Case range is classified as containing single/non-single
1881 values. Suppose a piece of case ranges in the following.
1883 [D1...D2] [S1] ... [Sn] [D3...D4]
1885 To represent default case's range sets between two non-single value
1886 case ranges (From D2 to D3), we construct predicate as:
1888 D2 < x < D3 && x != S1 && ... && x != Sn
1890 for (size_t i = 0; i < ranges.length (); i++)
1892 tree min = ranges[i].first;
1893 tree max = ranges[i].second;
1895 if (min == max)
1896 p_seg &= add_condition (summary, params_summary, index,
1897 param_type, &aggpos, NE_EXPR,
1898 min, param_ops);
1899 else
1901 /* Do not create sub-predicate for range that is beyond low bound
1902 of switch index. */
1903 if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
1905 p_seg &= add_condition (summary, params_summary, index,
1906 param_type, &aggpos,
1907 LT_EXPR, min, param_ops);
1908 p_all = p_all.or_with (summary->conds, p_seg);
1911 /* Do not create sub-predicate for range that is beyond up bound
1912 of switch index. */
1913 if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
1915 p_seg = false;
1916 break;
1919 p_seg = add_condition (summary, params_summary, index,
1920 param_type, &aggpos, GT_EXPR,
1921 max, param_ops);
1925 p_all = p_all.or_with (summary->conds, p_seg);
1926 *(ipa_predicate *) e->aux
1927 = p_all.or_with (summary->conds, *(ipa_predicate *) e->aux);
1929 vec_free (param_ops);
1933 /* For each BB in NODE attach to its AUX pointer predicate under
1934 which it is executable. */
1936 static void
1937 compute_bb_predicates (struct ipa_func_body_info *fbi,
1938 struct cgraph_node *node,
1939 class ipa_fn_summary *summary,
1940 class ipa_node_params *params_summary)
1942 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
1943 bool done = false;
1944 basic_block bb;
1946 FOR_EACH_BB_FN (bb, my_function)
1948 set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
1949 set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
1952 /* Entry block is always executable. */
1953 ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux
1954 = edge_predicate_pool.allocate ();
1955 *(ipa_predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true;
1957 /* A simple dataflow propagation of predicates forward in the CFG.
1958 TODO: work in reverse postorder. */
1959 while (!done)
1961 done = true;
1962 FOR_EACH_BB_FN (bb, my_function)
1964 ipa_predicate p = false;
1965 edge e;
1966 edge_iterator ei;
1967 FOR_EACH_EDGE (e, ei, bb->preds)
1969 if (e->src->aux)
1971 ipa_predicate this_bb_predicate
1972 = *(ipa_predicate *) e->src->aux;
1973 if (e->aux)
1974 this_bb_predicate &= (*(ipa_predicate *) e->aux);
1975 p = p.or_with (summary->conds, this_bb_predicate);
1976 if (p == true)
1977 break;
1980 if (p != false)
1982 basic_block pdom_bb;
1984 if (!bb->aux)
1986 done = false;
1987 bb->aux = edge_predicate_pool.allocate ();
1988 *((ipa_predicate *) bb->aux) = p;
1990 else if (p != *(ipa_predicate *) bb->aux)
1992 /* This OR operation is needed to ensure monotonous data flow
1993 in the case we hit the limit on number of clauses and the
1994 and/or operations above give approximate answers. */
1995 p = p.or_with (summary->conds, *(ipa_predicate *)bb->aux);
1996 if (p != *(ipa_predicate *)bb->aux)
1998 done = false;
1999 *((ipa_predicate *)bb->aux) = p;
2003 /* For switch/if statement, we can OR-combine predicates of all
2004 its cases/branches to get predicate for basic block in their
2005 convergence point, but sometimes this will generate very
2006 complicated predicate. Actually, we can get simplified
2007 predicate in another way by using the fact that predicate
2008 for a basic block must also hold true for its post dominators.
2009 To be specific, basic block in convergence point of
2010 conditional statement should include predicate of the
2011 statement. */
2012 pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
2013 if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
2015 else if (!pdom_bb->aux)
2017 done = false;
2018 pdom_bb->aux = edge_predicate_pool.allocate ();
2019 *((ipa_predicate *)pdom_bb->aux) = p;
2021 else if (p != *(ipa_predicate *)pdom_bb->aux)
2023 p = p.or_with (summary->conds,
2024 *(ipa_predicate *)pdom_bb->aux);
2025 if (p != *(ipa_predicate *)pdom_bb->aux)
2027 done = false;
2028 *((ipa_predicate *)pdom_bb->aux) = p;
2037 /* Return predicate specifying when the STMT might have result that is not
2038 a compile time constant. */
2040 static ipa_predicate
2041 will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
2042 class ipa_fn_summary *summary,
2043 class ipa_node_params *params_summary,
2044 tree expr,
2045 vec<ipa_predicate> nonconstant_names)
2047 tree parm;
2048 int index;
2050 while (UNARY_CLASS_P (expr))
2051 expr = TREE_OPERAND (expr, 0);
2053 parm = unmodified_parm (fbi, NULL, expr, NULL);
2054 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2055 return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
2056 ipa_predicate::changed, NULL_TREE);
2057 if (is_gimple_min_invariant (expr))
2058 return false;
2059 if (TREE_CODE (expr) == SSA_NAME)
2060 return nonconstant_names[SSA_NAME_VERSION (expr)];
2061 if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr))
2063 ipa_predicate p1
2064 = will_be_nonconstant_expr_predicate (fbi, summary,
2065 params_summary,
2066 TREE_OPERAND (expr, 0),
2067 nonconstant_names);
2068 if (p1 == true)
2069 return p1;
2071 ipa_predicate p2
2072 = will_be_nonconstant_expr_predicate (fbi, summary,
2073 params_summary,
2074 TREE_OPERAND (expr, 1),
2075 nonconstant_names);
2076 return p1.or_with (summary->conds, p2);
2078 else if (TREE_CODE (expr) == COND_EXPR)
2080 ipa_predicate p1
2081 = will_be_nonconstant_expr_predicate (fbi, summary,
2082 params_summary,
2083 TREE_OPERAND (expr, 0),
2084 nonconstant_names);
2085 if (p1 == true)
2086 return p1;
2088 ipa_predicate p2
2089 = will_be_nonconstant_expr_predicate (fbi, summary,
2090 params_summary,
2091 TREE_OPERAND (expr, 1),
2092 nonconstant_names);
2093 if (p2 == true)
2094 return p2;
2095 p1 = p1.or_with (summary->conds, p2);
2096 p2 = will_be_nonconstant_expr_predicate (fbi, summary,
2097 params_summary,
2098 TREE_OPERAND (expr, 2),
2099 nonconstant_names);
2100 return p2.or_with (summary->conds, p1);
2102 else if (TREE_CODE (expr) == CALL_EXPR)
2103 return true;
2104 else
2106 debug_tree (expr);
2107 gcc_unreachable ();
2112 /* Return predicate specifying when the STMT might have result that is not
2113 a compile time constant. */
2115 static ipa_predicate
2116 will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
2117 class ipa_fn_summary *summary,
2118 class ipa_node_params *params_summary,
2119 gimple *stmt,
2120 vec<ipa_predicate> nonconstant_names)
2122 ipa_predicate p = true;
2123 ssa_op_iter iter;
2124 tree use;
2125 tree param_type = NULL_TREE;
2126 ipa_predicate op_non_const;
2127 bool is_load;
2128 int base_index;
2129 struct agg_position_info aggpos;
2131 /* What statements might be optimized away
2132 when their arguments are constant. */
2133 if (gimple_code (stmt) != GIMPLE_ASSIGN
2134 && gimple_code (stmt) != GIMPLE_COND
2135 && gimple_code (stmt) != GIMPLE_SWITCH
2136 && (gimple_code (stmt) != GIMPLE_CALL
2137 || !(gimple_call_flags (stmt) & ECF_CONST)))
2138 return p;
2140 /* Stores will stay anyway. */
2141 if (gimple_store_p (stmt))
2142 return p;
2144 is_load = gimple_assign_load_p (stmt);
2146 /* Loads can be optimized when the value is known. */
2147 if (is_load)
2149 tree op = gimple_assign_rhs1 (stmt);
2150 if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
2151 &aggpos))
2152 return p;
2154 else
2155 base_index = -1;
2157 /* See if we understand all operands before we start
2158 adding conditionals. */
2159 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2161 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2162 /* For arguments we can build a condition. */
2163 if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0)
2164 continue;
2165 if (TREE_CODE (use) != SSA_NAME)
2166 return p;
2167 /* If we know when operand is constant,
2168 we still can say something useful. */
2169 if (nonconstant_names[SSA_NAME_VERSION (use)] != true)
2170 continue;
2171 return p;
2174 if (is_load)
2175 op_non_const =
2176 add_condition (summary, params_summary,
2177 base_index, param_type, &aggpos,
2178 ipa_predicate::changed, NULL_TREE);
2179 else
2180 op_non_const = false;
2181 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2183 tree parm = unmodified_parm (fbi, stmt, use, NULL);
2184 int index;
2186 if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
2188 if (index != base_index)
2189 p = add_condition (summary, params_summary, index,
2190 TREE_TYPE (parm), NULL,
2191 ipa_predicate::changed, NULL_TREE);
2192 else
2193 continue;
2195 else
2196 p = nonconstant_names[SSA_NAME_VERSION (use)];
2197 op_non_const = p.or_with (summary->conds, op_non_const);
2199 if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL)
2200 && gimple_op (stmt, 0)
2201 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2202 nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))]
2203 = op_non_const;
2204 return op_non_const;
2207 struct record_modified_bb_info
2209 tree op;
2210 bitmap bb_set;
2211 gimple *stmt;
2214 /* Value is initialized in INIT_BB and used in USE_BB. We want to compute
2215 probability how often it changes between USE_BB.
2216 INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB
2217 is in different loop nest, we can do better.
2218 This is all just estimate. In theory we look for minimal cut separating
2219 INIT_BB and USE_BB, but we only want to anticipate loop invariant motion
2220 anyway. */
2222 static basic_block
2223 get_minimal_bb (basic_block init_bb, basic_block use_bb)
2225 class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
2226 if (l && l->header->count < init_bb->count)
2227 return l->header;
2228 return init_bb;
2231 /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be
2232 set except for info->stmt. */
2234 static bool
2235 record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data)
2237 struct record_modified_bb_info *info =
2238 (struct record_modified_bb_info *) data;
2239 if (SSA_NAME_DEF_STMT (vdef) == info->stmt)
2240 return false;
2241 if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef)))
2242 return false;
2243 bitmap_set_bit (info->bb_set,
2244 SSA_NAME_IS_DEFAULT_DEF (vdef)
2245 ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index
2246 : get_minimal_bb
2247 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2248 gimple_bb (info->stmt))->index);
2249 if (dump_file)
2251 fprintf (dump_file, " Param ");
2252 print_generic_expr (dump_file, info->op, TDF_SLIM);
2253 fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ",
2254 gimple_bb (SSA_NAME_DEF_STMT (vdef))->index,
2255 get_minimal_bb
2256 (gimple_bb (SSA_NAME_DEF_STMT (vdef)),
2257 gimple_bb (info->stmt))->index);
2258 print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0);
2260 return false;
2263 /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT
2264 will change since last invocation of STMT.
2266 Value 0 is reserved for compile time invariants.
2267 For common parameters it is REG_BR_PROB_BASE. For loop invariants it
2268 ought to be REG_BR_PROB_BASE / estimated_iters. */
2270 static int
2271 param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i)
2273 tree op = gimple_call_arg (stmt, i);
2274 basic_block bb = gimple_bb (stmt);
2276 if (TREE_CODE (op) == WITH_SIZE_EXPR)
2277 op = TREE_OPERAND (op, 0);
2279 tree base = get_base_address (op);
2281 /* Global invariants never change. */
2282 if (is_gimple_min_invariant (base))
2283 return 0;
2285 /* We would have to do non-trivial analysis to really work out what
2286 is the probability of value to change (i.e. when init statement
2287 is in a sibling loop of the call).
2289 We do an conservative estimate: when call is executed N times more often
2290 than the statement defining value, we take the frequency 1/N. */
2291 if (TREE_CODE (base) == SSA_NAME)
2293 profile_count init_count;
2295 if (!bb->count.nonzero_p ())
2296 return REG_BR_PROB_BASE;
2298 if (SSA_NAME_IS_DEFAULT_DEF (base))
2299 init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2300 else
2301 init_count = get_minimal_bb
2302 (gimple_bb (SSA_NAME_DEF_STMT (base)),
2303 gimple_bb (stmt))->count;
2305 if (init_count < bb->count)
2306 return MAX ((init_count.to_sreal_scale (bb->count)
2307 * REG_BR_PROB_BASE).to_int (), 1);
2308 return REG_BR_PROB_BASE;
2310 else
2312 ao_ref refd;
2313 profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2314 struct record_modified_bb_info info;
2315 tree init = ctor_for_folding (base);
2317 if (init != error_mark_node)
2318 return 0;
2319 if (!bb->count.nonzero_p () || fbi->aa_walk_budget == 0)
2320 return REG_BR_PROB_BASE;
2321 if (dump_file)
2323 fprintf (dump_file, " Analyzing param change probability of ");
2324 print_generic_expr (dump_file, op, TDF_SLIM);
2325 fprintf (dump_file, "\n");
2327 ao_ref_init (&refd, op);
2328 info.op = op;
2329 info.stmt = stmt;
2330 info.bb_set = BITMAP_ALLOC (NULL);
2331 int walked
2332 = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info,
2333 NULL, NULL, fbi->aa_walk_budget);
2334 if (walked > 0)
2335 fbi->aa_walk_budget -= walked;
2336 if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index))
2338 if (walked < 0)
2339 fbi->aa_walk_budget = 0;
2340 if (dump_file)
2342 if (walked < 0)
2343 fprintf (dump_file, " Ran out of AA walking budget.\n");
2344 else
2345 fprintf (dump_file, " Set in same BB as used.\n");
2347 BITMAP_FREE (info.bb_set);
2348 return REG_BR_PROB_BASE;
2351 bitmap_iterator bi;
2352 unsigned index;
2353 /* Lookup the most frequent update of the value and believe that
2354 it dominates all the other; precise analysis here is difficult. */
2355 EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi)
2356 max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count);
2357 if (dump_file)
2359 fprintf (dump_file, " Set with count ");
2360 max.dump (dump_file);
2361 fprintf (dump_file, " and used with count ");
2362 bb->count.dump (dump_file);
2363 fprintf (dump_file, " freq %f\n",
2364 max.to_sreal_scale (bb->count).to_double ());
2367 BITMAP_FREE (info.bb_set);
2368 if (max < bb->count)
2369 return MAX ((max.to_sreal_scale (bb->count)
2370 * REG_BR_PROB_BASE).to_int (), 1);
2371 return REG_BR_PROB_BASE;
2375 /* Find whether a basic block BB is the final block of a (half) diamond CFG
2376 sub-graph and if the predicate the condition depends on is known. If so,
2377 return true and store the pointer the predicate in *P. */
2379 static bool
2380 phi_result_unknown_predicate (ipa_func_body_info *fbi,
2381 ipa_fn_summary *summary,
2382 class ipa_node_params *params_summary,
2383 basic_block bb,
2384 ipa_predicate *p,
2385 vec<ipa_predicate> nonconstant_names)
2387 edge e;
2388 edge_iterator ei;
2389 basic_block first_bb = NULL;
2391 if (single_pred_p (bb))
2393 *p = false;
2394 return true;
2397 FOR_EACH_EDGE (e, ei, bb->preds)
2399 if (single_succ_p (e->src))
2401 if (!single_pred_p (e->src))
2402 return false;
2403 if (!first_bb)
2404 first_bb = single_pred (e->src);
2405 else if (single_pred (e->src) != first_bb)
2406 return false;
2408 else
2410 if (!first_bb)
2411 first_bb = e->src;
2412 else if (e->src != first_bb)
2413 return false;
2417 if (!first_bb)
2418 return false;
2420 gcond *stmt = safe_dyn_cast <gcond *> (*gsi_last_bb (first_bb));
2421 if (!stmt
2422 || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
2423 return false;
2425 *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
2426 gimple_cond_lhs (stmt),
2427 nonconstant_names);
2428 if (*p == true)
2429 return false;
2430 else
2431 return true;
2434 /* Given a PHI statement in a function described by inline properties SUMMARY
2435 and *P being the predicate describing whether the selected PHI argument is
2436 known, store a predicate for the result of the PHI statement into
2437 NONCONSTANT_NAMES, if possible. */
2439 static void
2440 predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
2441 ipa_predicate *p,
2442 vec<ipa_predicate> nonconstant_names)
2444 unsigned i;
2446 for (i = 0; i < gimple_phi_num_args (phi); i++)
2448 tree arg = gimple_phi_arg (phi, i)->def;
2449 if (!is_gimple_min_invariant (arg))
2451 gcc_assert (TREE_CODE (arg) == SSA_NAME);
2452 *p = p->or_with (summary->conds,
2453 nonconstant_names[SSA_NAME_VERSION (arg)]);
2454 if (*p == true)
2455 return;
2459 if (dump_file && (dump_flags & TDF_DETAILS))
2461 fprintf (dump_file, "\t\tphi predicate: ");
2462 p->dump (dump_file, summary->conds);
2464 nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p;
2467 /* For a typical usage of __builtin_expect (a<b, 1), we
2468 may introduce an extra relation stmt:
2469 With the builtin, we have
2470 t1 = a <= b;
2471 t2 = (long int) t1;
2472 t3 = __builtin_expect (t2, 1);
2473 if (t3 != 0)
2474 goto ...
2475 Without the builtin, we have
2476 if (a<=b)
2477 goto...
2478 This affects the size/time estimation and may have
2479 an impact on the earlier inlining.
2480 Here find this pattern and fix it up later. */
2482 static gimple *
2483 find_foldable_builtin_expect (basic_block bb)
2485 gimple_stmt_iterator bsi;
2487 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2489 gimple *stmt = gsi_stmt (bsi);
2490 if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT)
2491 || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY)
2492 || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT))
2494 tree var = gimple_call_lhs (stmt);
2495 tree arg = gimple_call_arg (stmt, 0);
2496 use_operand_p use_p;
2497 gimple *use_stmt;
2498 bool match = false;
2499 bool done = false;
2501 if (!var || !arg)
2502 continue;
2503 gcc_assert (TREE_CODE (var) == SSA_NAME);
2505 while (TREE_CODE (arg) == SSA_NAME)
2507 gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg);
2508 if (!is_gimple_assign (stmt_tmp))
2509 break;
2510 switch (gimple_assign_rhs_code (stmt_tmp))
2512 case LT_EXPR:
2513 case LE_EXPR:
2514 case GT_EXPR:
2515 case GE_EXPR:
2516 case EQ_EXPR:
2517 case NE_EXPR:
2518 match = true;
2519 done = true;
2520 break;
2521 CASE_CONVERT:
2522 break;
2523 default:
2524 done = true;
2525 break;
2527 if (done)
2528 break;
2529 arg = gimple_assign_rhs1 (stmt_tmp);
2532 if (match && single_imm_use (var, &use_p, &use_stmt)
2533 && gimple_code (use_stmt) == GIMPLE_COND)
2534 return use_stmt;
2537 return NULL;
2540 /* Return true when the basic blocks contains only clobbers followed by RESX.
2541 Such BBs are kept around to make removal of dead stores possible with
2542 presence of EH and will be optimized out by optimize_clobbers later in the
2543 game.
2545 NEED_EH is used to recurse in case the clobber has non-EH predecessors
2546 that can be clobber only, too.. When it is false, the RESX is not necessary
2547 on the end of basic block. */
2549 static bool
2550 clobber_only_eh_bb_p (basic_block bb, bool need_eh = true)
2552 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2553 edge_iterator ei;
2554 edge e;
2556 if (need_eh)
2558 if (gsi_end_p (gsi))
2559 return false;
2560 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX)
2561 return false;
2562 gsi_prev (&gsi);
2564 else if (!single_succ_p (bb))
2565 return false;
2567 for (; !gsi_end_p (gsi); gsi_prev (&gsi))
2569 gimple *stmt = gsi_stmt (gsi);
2570 if (is_gimple_debug (stmt))
2571 continue;
2572 if (gimple_clobber_p (stmt))
2573 continue;
2574 if (gimple_code (stmt) == GIMPLE_LABEL)
2575 break;
2576 return false;
2579 /* See if all predecessors are either throws or clobber only BBs. */
2580 FOR_EACH_EDGE (e, ei, bb->preds)
2581 if (!(e->flags & EDGE_EH)
2582 && !clobber_only_eh_bb_p (e->src, false))
2583 return false;
2585 return true;
2588 /* Return true if STMT compute a floating point expression that may be affected
2589 by -ffast-math and similar flags. */
2591 static bool
2592 fp_expression_p (gimple *stmt)
2594 ssa_op_iter i;
2595 tree op;
2597 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE)
2598 if (FLOAT_TYPE_P (TREE_TYPE (op)))
2599 return true;
2600 return false;
2603 /* Return true if T references memory location that is local
2604 for the function (that means, dead after return) or read-only. */
2606 bool
2607 refs_local_or_readonly_memory_p (tree t)
2609 /* Non-escaping memory is fine. */
2610 t = get_base_address (t);
2611 if ((TREE_CODE (t) == MEM_REF
2612 || TREE_CODE (t) == TARGET_MEM_REF))
2613 return points_to_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2615 /* Automatic variables are fine. */
2616 if (DECL_P (t)
2617 && auto_var_in_fn_p (t, current_function_decl))
2618 return true;
2620 /* Read-only variables are fine. */
2621 if (DECL_P (t) && TREE_READONLY (t))
2622 return true;
2624 return false;
2627 /* Return true if T is a pointer pointing to memory location that is local
2628 for the function (that means, dead after return) or read-only. */
2630 bool
2631 points_to_local_or_readonly_memory_p (tree t)
2633 /* See if memory location is clearly invalid. */
2634 if (integer_zerop (t))
2635 return flag_delete_null_pointer_checks;
2636 if (TREE_CODE (t) == SSA_NAME)
2638 /* For IPA passes we can consinder accesses to return slot local
2639 even if it is not local in the sense that memory is dead by
2640 the end of founction.
2641 The outer function will see a store in the call assignment
2642 and thus this will do right thing for all uses of this
2643 function in the current IPA passes (modref, pure/const discovery
2644 and inlining heuristics). */
2645 if (DECL_RESULT (current_function_decl)
2646 && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
2647 && t == ssa_default_def (cfun, DECL_RESULT (current_function_decl)))
2648 return true;
2649 return !ptr_deref_may_alias_global_p (t, false);
2651 if (TREE_CODE (t) == ADDR_EXPR
2652 && (TREE_CODE (TREE_OPERAND (t, 0)) != TARGET_MEM_REF
2653 || TREE_CODE (TREE_OPERAND (TREE_OPERAND (t, 0), 0)) != INTEGER_CST))
2654 return refs_local_or_readonly_memory_p (TREE_OPERAND (t, 0));
2655 return false;
2658 /* Return true if T is a pointer pointing to memory location that is possible
2659 sra candidate if all functions it is passed to are inlined. */
2661 static bool
2662 points_to_possible_sra_candidate_p (tree t)
2664 if (TREE_CODE (t) != ADDR_EXPR)
2665 return false;
2667 t = get_base_address (TREE_OPERAND (t, 0));
2669 /* Automatic variables are fine. */
2670 if (DECL_P (t)
2671 && auto_var_in_fn_p (t, current_function_decl))
2672 return true;
2673 return false;
2676 /* Analyze function body for NODE.
2677 EARLY indicates run from early optimization pipeline. */
2679 static void
2680 analyze_function_body (struct cgraph_node *node, bool early)
2682 sreal time = opt_for_fn (node->decl, param_uninlined_function_time);
2683 /* Estimate static overhead for function prologue/epilogue and alignment. */
2684 int size = opt_for_fn (node->decl, param_uninlined_function_insns);
2685 /* Benefits are scaled by probability of elimination that is in range
2686 <0,2>. */
2687 basic_block bb;
2688 struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
2689 sreal freq;
2690 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
2691 ipa_node_params *params_summary
2692 = early ? NULL : ipa_node_params_sum->get (node);
2693 ipa_predicate bb_predicate;
2694 struct ipa_func_body_info fbi;
2695 vec<ipa_predicate> nonconstant_names = vNULL;
2696 int nblocks, n;
2697 int *order;
2698 gimple *fix_builtin_expect_stmt;
2700 gcc_assert (my_function && my_function->cfg);
2701 gcc_assert (cfun == my_function);
2703 memset(&fbi, 0, sizeof(fbi));
2704 vec_free (info->conds);
2705 info->conds = NULL;
2706 info->size_time_table.release ();
2707 info->call_size_time_table.release ();
2709 /* When optimizing and analyzing for IPA inliner, initialize loop optimizer
2710 so we can produce proper inline hints.
2712 When optimizing and analyzing for early inliner, initialize node params
2713 so we can produce correct BB predicates. */
2715 if (opt_for_fn (node->decl, optimize))
2717 calculate_dominance_info (CDI_DOMINATORS);
2718 calculate_dominance_info (CDI_POST_DOMINATORS);
2719 if (!early)
2720 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
2721 else
2723 ipa_check_create_node_params ();
2724 ipa_initialize_node_params (node);
2727 if (ipa_node_params_sum)
2729 fbi.node = node;
2730 fbi.info = ipa_node_params_sum->get (node);
2731 fbi.bb_infos = vNULL;
2732 fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
2733 fbi.param_count = count_formal_params (node->decl);
2734 fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps);
2736 nonconstant_names.safe_grow_cleared
2737 (SSANAMES (my_function)->length (), true);
2741 if (dump_file)
2742 fprintf (dump_file, "\nAnalyzing function body size: %s\n",
2743 node->dump_name ());
2745 /* When we run into maximal number of entries, we assign everything to the
2746 constant truth case. Be sure to have it in list. */
2747 bb_predicate = true;
2748 info->account_size_time (0, 0, bb_predicate, bb_predicate);
2750 bb_predicate = ipa_predicate::not_inlined ();
2751 info->account_size_time (opt_for_fn (node->decl,
2752 param_uninlined_function_insns)
2753 * ipa_fn_summary::size_scale,
2754 opt_for_fn (node->decl,
2755 param_uninlined_function_time),
2756 bb_predicate,
2757 bb_predicate);
2759 /* Only look for target information for inlinable functions. */
2760 bool scan_for_target_info =
2761 info->inlinable
2762 && targetm.target_option.need_ipa_fn_target_info (node->decl,
2763 info->target_info);
2765 if (fbi.info)
2766 compute_bb_predicates (&fbi, node, info, params_summary);
2767 const profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
2768 order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
2769 nblocks = pre_and_rev_post_order_compute (NULL, order, false);
2770 for (n = 0; n < nblocks; n++)
2772 bb = BASIC_BLOCK_FOR_FN (cfun, order[n]);
2773 freq = bb->count.to_sreal_scale (entry_count);
2774 if (clobber_only_eh_bb_p (bb))
2776 if (dump_file && (dump_flags & TDF_DETAILS))
2777 fprintf (dump_file, "\n Ignoring BB %i;"
2778 " it will be optimized away by cleanup_clobbers\n",
2779 bb->index);
2780 continue;
2783 /* TODO: Obviously predicates can be propagated down across CFG. */
2784 if (fbi.info)
2786 if (bb->aux)
2787 bb_predicate = *(ipa_predicate *)bb->aux;
2788 else
2789 bb_predicate = false;
2791 else
2792 bb_predicate = true;
2794 if (dump_file && (dump_flags & TDF_DETAILS))
2796 fprintf (dump_file, "\n BB %i predicate:", bb->index);
2797 bb_predicate.dump (dump_file, info->conds);
2800 if (fbi.info && nonconstant_names.exists ())
2802 ipa_predicate phi_predicate;
2803 bool first_phi = true;
2805 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
2806 gsi_next (&bsi))
2808 if (first_phi
2809 && !phi_result_unknown_predicate (&fbi, info,
2810 params_summary,
2812 &phi_predicate,
2813 nonconstant_names))
2814 break;
2815 first_phi = false;
2816 if (dump_file && (dump_flags & TDF_DETAILS))
2818 fprintf (dump_file, " ");
2819 print_gimple_stmt (dump_file, gsi_stmt (bsi), 0);
2821 predicate_for_phi_result (info, bsi.phi (), &phi_predicate,
2822 nonconstant_names);
2826 fix_builtin_expect_stmt = find_foldable_builtin_expect (bb);
2828 for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb);
2829 !gsi_end_p (bsi); gsi_next_nondebug (&bsi))
2831 gimple *stmt = gsi_stmt (bsi);
2832 int this_size = estimate_num_insns (stmt, &eni_size_weights);
2833 int this_time = estimate_num_insns (stmt, &eni_time_weights);
2834 int prob;
2835 ipa_predicate will_be_nonconstant;
2837 /* This relation stmt should be folded after we remove
2838 __builtin_expect call. Adjust the cost here. */
2839 if (stmt == fix_builtin_expect_stmt)
2841 this_size--;
2842 this_time--;
2845 if (dump_file && (dump_flags & TDF_DETAILS))
2847 fprintf (dump_file, " ");
2848 print_gimple_stmt (dump_file, stmt, 0);
2849 fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n",
2850 freq.to_double (), this_size,
2851 this_time);
2854 if (is_gimple_call (stmt)
2855 && !gimple_call_internal_p (stmt))
2857 struct cgraph_edge *edge = node->get_edge (stmt);
2858 ipa_call_summary *es = ipa_call_summaries->get_create (edge);
2860 /* Special case: results of BUILT_IN_CONSTANT_P will be always
2861 resolved as constant. We however don't want to optimize
2862 out the cgraph edges. */
2863 if (nonconstant_names.exists ()
2864 && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P)
2865 && gimple_call_lhs (stmt)
2866 && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME)
2868 ipa_predicate false_p = false;
2869 nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))]
2870 = false_p;
2872 if (ipa_node_params_sum)
2874 int count = gimple_call_num_args (stmt);
2875 int i;
2877 if (count)
2878 es->param.safe_grow_cleared (count, true);
2879 for (i = 0; i < count; i++)
2881 int prob = param_change_prob (&fbi, stmt, i);
2882 gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE);
2883 es->param[i].change_prob = prob;
2884 es->param[i].points_to_local_or_readonly_memory
2885 = points_to_local_or_readonly_memory_p
2886 (gimple_call_arg (stmt, i));
2887 es->param[i].points_to_possible_sra_candidate
2888 = points_to_possible_sra_candidate_p
2889 (gimple_call_arg (stmt, i));
2892 /* We cannot setup VLA parameters during inlining. */
2893 for (unsigned int i = 0; i < gimple_call_num_args (stmt); ++i)
2894 if (TREE_CODE (gimple_call_arg (stmt, i)) == WITH_SIZE_EXPR)
2896 edge->inline_failed = CIF_FUNCTION_NOT_INLINABLE;
2897 break;
2899 es->call_stmt_size = this_size;
2900 es->call_stmt_time = this_time;
2901 es->loop_depth = bb_loop_depth (bb);
2902 edge_set_predicate (edge, &bb_predicate);
2903 if (edge->speculative)
2905 cgraph_edge *indirect
2906 = edge->speculative_call_indirect_edge ();
2907 ipa_call_summary *es2
2908 = ipa_call_summaries->get_create (indirect);
2909 ipa_call_summaries->duplicate (edge, indirect,
2910 es, es2);
2912 /* Edge is the first direct call.
2913 create and duplicate call summaries for multiple
2914 speculative call targets. */
2915 for (cgraph_edge *direct
2916 = edge->next_speculative_call_target ();
2917 direct;
2918 direct = direct->next_speculative_call_target ())
2920 ipa_call_summary *es3
2921 = ipa_call_summaries->get_create (direct);
2922 ipa_call_summaries->duplicate (edge, direct,
2923 es, es3);
2928 /* TODO: When conditional jump or switch is known to be constant, but
2929 we did not translate it into the predicates, we really can account
2930 just maximum of the possible paths. */
2931 if (fbi.info)
2932 will_be_nonconstant
2933 = will_be_nonconstant_predicate (&fbi, info, params_summary,
2934 stmt, nonconstant_names);
2935 else
2936 will_be_nonconstant = true;
2937 if (this_time || this_size)
2939 sreal final_time = (sreal)this_time * freq;
2940 prob = eliminated_by_inlining_prob (&fbi, stmt);
2941 if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS))
2942 fprintf (dump_file,
2943 "\t\t50%% will be eliminated by inlining\n");
2944 if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
2945 fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
2947 ipa_predicate p = bb_predicate & will_be_nonconstant;
2948 int parm = load_or_store_of_ptr_parameter (&fbi, stmt);
2949 ipa_predicate sra_predicate = true;
2950 if (parm != -1)
2951 sra_predicate &= add_condition (info, params_summary, parm,
2952 ptr_type_node, NULL,
2953 ipa_predicate::not_sra_candidate, NULL, 0);
2955 /* We can ignore statement when we proved it is never going
2956 to happen, but we cannot do that for call statements
2957 because edges are accounted specially. */
2959 if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false)
2961 time += final_time;
2962 size += this_size;
2965 /* We account everything but the calls. Calls have their own
2966 size/time info attached to cgraph edges. This is necessary
2967 in order to make the cost disappear after inlining. */
2968 if (!is_gimple_call (stmt))
2970 if (prob)
2972 ipa_predicate ip
2973 = bb_predicate & ipa_predicate::not_inlined () & sra_predicate;
2974 info->account_size_time (this_size * prob,
2975 (final_time * prob) / 2, ip,
2978 if (prob != 2)
2979 info->account_size_time (this_size * (2 - prob),
2980 (final_time * (2 - prob) / 2),
2981 bb_predicate & sra_predicate,
2985 if (!info->fp_expressions && fp_expression_p (stmt))
2987 info->fp_expressions = true;
2988 if (dump_file)
2989 fprintf (dump_file, " fp_expression set\n");
2993 /* For target specific information, we want to scan all statements
2994 rather than those statements with non-zero weights, to avoid
2995 missing to scan something interesting for target information,
2996 such as: internal function calls. */
2997 if (scan_for_target_info)
2998 scan_for_target_info =
2999 targetm.target_option.update_ipa_fn_target_info
3000 (info->target_info, stmt);
3002 /* Account cost of address calculations in the statements. */
3003 for (unsigned int i = 0; i < gimple_num_ops (stmt); i++)
3005 for (tree op = gimple_op (stmt, i);
3006 op && handled_component_p (op);
3007 op = TREE_OPERAND (op, 0))
3008 if ((TREE_CODE (op) == ARRAY_REF
3009 || TREE_CODE (op) == ARRAY_RANGE_REF)
3010 && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME)
3012 ipa_predicate p = bb_predicate;
3013 if (fbi.info)
3014 p = p & will_be_nonconstant_expr_predicate
3015 (&fbi, info, params_summary,
3016 TREE_OPERAND (op, 1),
3017 nonconstant_names);
3018 if (p != false)
3020 time += freq;
3021 size += 1;
3022 if (dump_file)
3023 fprintf (dump_file,
3024 "\t\tAccounting address calculation.\n");
3025 info->account_size_time (ipa_fn_summary::size_scale,
3026 freq,
3027 bb_predicate,
3035 free (order);
3037 if (nonconstant_names.exists () && !early)
3039 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3040 unsigned max_loop_predicates = opt_for_fn (node->decl,
3041 param_ipa_max_loop_predicates);
3043 if (dump_file && (dump_flags & TDF_DETAILS))
3044 flow_loops_dump (dump_file, NULL, 0);
3045 scev_initialize ();
3046 for (auto loop : loops_list (cfun, 0))
3048 ipa_predicate loop_iterations = true;
3049 sreal header_freq;
3050 edge ex;
3051 unsigned int j;
3052 class tree_niter_desc niter_desc;
3053 if (!loop->header->aux)
3054 continue;
3056 profile_count phdr_count = loop_preheader_edge (loop)->count ();
3057 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
3059 bb_predicate = *(ipa_predicate *)loop->header->aux;
3060 auto_vec<edge> exits = get_loop_exit_edges (loop);
3061 FOR_EACH_VEC_ELT (exits, j, ex)
3062 if (number_of_iterations_exit (loop, ex, &niter_desc, false)
3063 && !is_gimple_min_invariant (niter_desc.niter))
3065 ipa_predicate will_be_nonconstant
3066 = will_be_nonconstant_expr_predicate (&fbi, info,
3067 params_summary,
3068 niter_desc.niter,
3069 nonconstant_names);
3070 if (will_be_nonconstant != true)
3071 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3072 if (will_be_nonconstant != true
3073 && will_be_nonconstant != false)
3074 loop_iterations &= will_be_nonconstant;
3076 add_freqcounting_predicate (&s->loop_iterations, loop_iterations,
3077 phdr_freq, max_loop_predicates);
3080 /* To avoid quadratic behavior we analyze stride predicates only
3081 with respect to the containing loop. Thus we simply iterate
3082 over all defs in the outermost loop body. */
3083 for (class loop *loop = loops_for_fn (cfun)->tree_root->inner;
3084 loop != NULL; loop = loop->next)
3086 ipa_predicate loop_stride = true;
3087 basic_block *body = get_loop_body (loop);
3088 profile_count phdr_count = loop_preheader_edge (loop)->count ();
3089 sreal phdr_freq = phdr_count.to_sreal_scale (entry_count);
3090 for (unsigned i = 0; i < loop->num_nodes; i++)
3092 gimple_stmt_iterator gsi;
3093 if (!body[i]->aux)
3094 continue;
3096 bb_predicate = *(ipa_predicate *)body[i]->aux;
3097 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi);
3098 gsi_next (&gsi))
3100 gimple *stmt = gsi_stmt (gsi);
3102 if (!is_gimple_assign (stmt))
3103 continue;
3105 tree def = gimple_assign_lhs (stmt);
3106 if (TREE_CODE (def) != SSA_NAME)
3107 continue;
3109 affine_iv iv;
3110 if (!simple_iv (loop_containing_stmt (stmt),
3111 loop_containing_stmt (stmt),
3112 def, &iv, true)
3113 || is_gimple_min_invariant (iv.step))
3114 continue;
3116 ipa_predicate will_be_nonconstant
3117 = will_be_nonconstant_expr_predicate (&fbi, info,
3118 params_summary,
3119 iv.step,
3120 nonconstant_names);
3121 if (will_be_nonconstant != true)
3122 will_be_nonconstant = bb_predicate & will_be_nonconstant;
3123 if (will_be_nonconstant != true
3124 && will_be_nonconstant != false)
3125 loop_stride = loop_stride & will_be_nonconstant;
3128 add_freqcounting_predicate (&s->loop_strides, loop_stride,
3129 phdr_freq, max_loop_predicates);
3130 free (body);
3132 scev_finalize ();
3134 FOR_ALL_BB_FN (bb, my_function)
3136 edge e;
3137 edge_iterator ei;
3139 if (bb->aux)
3140 edge_predicate_pool.remove ((ipa_predicate *)bb->aux);
3141 bb->aux = NULL;
3142 FOR_EACH_EDGE (e, ei, bb->succs)
3144 if (e->aux)
3145 edge_predicate_pool.remove ((ipa_predicate *)e->aux);
3146 e->aux = NULL;
3149 ipa_fn_summary *s = ipa_fn_summaries->get (node);
3150 ipa_size_summary *ss = ipa_size_summaries->get (node);
3151 s->time = time;
3152 ss->self_size = size;
3153 nonconstant_names.release ();
3154 ipa_release_body_info (&fbi);
3155 if (opt_for_fn (node->decl, optimize))
3157 if (!early)
3158 loop_optimizer_finalize ();
3159 else if (!ipa_edge_args_sum)
3160 ipa_free_all_node_params ();
3161 free_dominance_info (CDI_DOMINATORS);
3162 free_dominance_info (CDI_POST_DOMINATORS);
3164 if (dump_file)
3166 fprintf (dump_file, "\n");
3167 ipa_dump_fn_summary (dump_file, node);
3172 /* Compute function summary.
3173 EARLY is true when we compute parameters during early opts. */
3175 void
3176 compute_fn_summary (struct cgraph_node *node, bool early)
3178 HOST_WIDE_INT self_stack_size;
3179 struct cgraph_edge *e;
3181 gcc_assert (!node->inlined_to);
3183 if (!ipa_fn_summaries)
3184 ipa_fn_summary_alloc ();
3186 /* Create a new ipa_fn_summary. */
3187 ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
3188 ipa_fn_summaries->remove (node);
3189 class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
3190 class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
3192 /* Estimate the stack size for the function if we're optimizing. */
3193 self_stack_size = optimize && !node->thunk
3194 ? estimated_stack_frame_size (node) : 0;
3195 size_info->estimated_self_stack_size = self_stack_size;
3196 info->estimated_stack_size = self_stack_size;
3198 if (node->thunk)
3200 ipa_call_summary *es = ipa_call_summaries->get_create (node->callees);
3201 ipa_predicate t = true;
3203 node->can_change_signature = false;
3204 es->call_stmt_size = eni_size_weights.call_cost;
3205 es->call_stmt_time = eni_time_weights.call_cost;
3206 info->account_size_time (ipa_fn_summary::size_scale
3207 * opt_for_fn (node->decl,
3208 param_uninlined_function_thunk_insns),
3209 opt_for_fn (node->decl,
3210 param_uninlined_function_thunk_time), t, t);
3211 t = ipa_predicate::not_inlined ();
3212 info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
3213 ipa_update_overall_fn_summary (node);
3214 size_info->self_size = size_info->size;
3215 if (stdarg_p (TREE_TYPE (node->decl)))
3217 info->inlinable = false;
3218 node->callees->inline_failed = CIF_VARIADIC_THUNK;
3220 else
3221 info->inlinable = true;
3223 else
3225 /* Even is_gimple_min_invariant rely on current_function_decl. */
3226 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
3228 /* During IPA profile merging we may be called w/o virtual SSA form
3229 built. */
3230 update_ssa (TODO_update_ssa_only_virtuals);
3232 /* Can this function be inlined at all? */
3233 if (!opt_for_fn (node->decl, optimize)
3234 && !lookup_attribute ("always_inline",
3235 DECL_ATTRIBUTES (node->decl)))
3236 info->inlinable = false;
3237 else
3238 info->inlinable = tree_inlinable_function_p (node->decl);
3240 bool no_signature = false;
3241 /* Type attributes can use parameter indices to describe them.
3242 Special case fn spec since we can safely preserve them in
3243 modref summaries. */
3244 for (tree list = TYPE_ATTRIBUTES (TREE_TYPE (node->decl));
3245 list && !no_signature; list = TREE_CHAIN (list))
3246 if (!ipa_param_adjustments::type_attribute_allowed_p
3247 (get_attribute_name (list)))
3249 if (dump_file)
3251 fprintf (dump_file, "No signature change:"
3252 " function type has unhandled attribute %s.\n",
3253 IDENTIFIER_POINTER (get_attribute_name (list)));
3255 no_signature = true;
3257 for (tree parm = DECL_ARGUMENTS (node->decl);
3258 parm && !no_signature; parm = DECL_CHAIN (parm))
3259 if (variably_modified_type_p (TREE_TYPE (parm), node->decl))
3261 if (dump_file)
3263 fprintf (dump_file, "No signature change:"
3264 " has parameter with variably modified type.\n");
3266 no_signature = true;
3269 /* Likewise for #pragma omp declare simd functions or functions
3270 with simd attribute. */
3271 if (no_signature
3272 || lookup_attribute ("omp declare simd",
3273 DECL_ATTRIBUTES (node->decl)))
3274 node->can_change_signature = false;
3275 else
3277 /* Otherwise, inlinable functions always can change signature. */
3278 if (info->inlinable)
3279 node->can_change_signature = true;
3280 else
3282 /* Functions calling builtin_apply cannot change signature. */
3283 for (e = node->callees; e; e = e->next_callee)
3285 tree cdecl = e->callee->decl;
3286 if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS,
3287 BUILT_IN_VA_START))
3288 break;
3290 node->can_change_signature = !e;
3293 analyze_function_body (node, early);
3294 pop_cfun ();
3297 /* Inlining characteristics are maintained by the cgraph_mark_inline. */
3298 size_info->size = size_info->self_size;
3299 info->estimated_stack_size = size_info->estimated_self_stack_size;
3301 /* Code above should compute exactly the same result as
3302 ipa_update_overall_fn_summary except for case when speculative
3303 edges are present since these are accounted to size but not
3304 self_size. Do not compare time since different order the roundoff
3305 errors result in slight changes. */
3306 ipa_update_overall_fn_summary (node);
3307 if (flag_checking)
3309 for (e = node->indirect_calls; e; e = e->next_callee)
3310 if (e->speculative)
3311 break;
3312 gcc_assert (e || size_info->size == size_info->self_size);
3317 /* Compute parameters of functions used by inliner using
3318 current_function_decl. */
3320 static unsigned int
3321 compute_fn_summary_for_current (void)
3323 compute_fn_summary (cgraph_node::get (current_function_decl), true);
3324 return 0;
3327 /* Estimate benefit devirtualizing indirect edge IE and return true if it can
3328 be devirtualized and inlined, provided m_known_vals, m_known_contexts and
3329 m_known_aggs in AVALS. Return false straight away if AVALS is NULL. */
3331 static bool
3332 estimate_edge_devirt_benefit (struct cgraph_edge *ie,
3333 int *size, int *time,
3334 ipa_call_arg_values *avals)
3336 tree target;
3337 struct cgraph_node *callee;
3338 class ipa_fn_summary *isummary;
3339 enum availability avail;
3340 bool speculative;
3342 if (!avals
3343 || (!avals->m_known_vals.length() && !avals->m_known_contexts.length ()))
3344 return false;
3345 if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining))
3346 return false;
3348 target = ipa_get_indirect_edge_target (ie, avals, &speculative);
3349 if (!target || speculative)
3350 return false;
3352 /* Account for difference in cost between indirect and direct calls. */
3353 *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost);
3354 *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost);
3355 gcc_checking_assert (*time >= 0);
3356 gcc_checking_assert (*size >= 0);
3358 callee = cgraph_node::get (target);
3359 if (!callee || !callee->definition)
3360 return false;
3361 callee = callee->function_symbol (&avail);
3362 if (avail < AVAIL_AVAILABLE)
3363 return false;
3364 isummary = ipa_fn_summaries->get (callee);
3365 if (isummary == NULL)
3366 return false;
3368 return isummary->inlinable;
3371 /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to
3372 handle edge E with probability PROB. Set HINTS accordingly if edge may be
3373 devirtualized. AVALS, if non-NULL, describes the context of the call site
3374 as far as values of parameters are concerened. */
3376 static inline void
3377 estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size,
3378 sreal *time, ipa_call_arg_values *avals,
3379 ipa_hints *hints)
3381 class ipa_call_summary *es = ipa_call_summaries->get (e);
3382 int call_size = es->call_stmt_size;
3383 int call_time = es->call_stmt_time;
3384 int cur_size;
3386 if (!e->callee && hints && e->maybe_hot_p ()
3387 && estimate_edge_devirt_benefit (e, &call_size, &call_time, avals))
3388 *hints |= INLINE_HINT_indirect_call;
3389 cur_size = call_size * ipa_fn_summary::size_scale;
3390 *size += cur_size;
3391 if (min_size)
3392 *min_size += cur_size;
3393 if (time)
3394 *time += ((sreal)call_time) * e->sreal_frequency ();
3398 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3399 calls in NODE. POSSIBLE_TRUTHS and AVALS describe the context of the call
3400 site.
3402 Helper for estimate_calls_size_and_time which does the same but
3403 (in most cases) faster. */
3405 static void
3406 estimate_calls_size_and_time_1 (struct cgraph_node *node, int *size,
3407 int *min_size, sreal *time,
3408 ipa_hints *hints,
3409 clause_t possible_truths,
3410 ipa_call_arg_values *avals)
3412 struct cgraph_edge *e;
3413 for (e = node->callees; e; e = e->next_callee)
3415 if (!e->inline_failed)
3417 gcc_checking_assert (!ipa_call_summaries->get (e));
3418 estimate_calls_size_and_time_1 (e->callee, size, min_size, time,
3419 hints, possible_truths, avals);
3421 continue;
3423 class ipa_call_summary *es = ipa_call_summaries->get (e);
3425 /* Do not care about zero sized builtins. */
3426 if (!es->call_stmt_size)
3428 gcc_checking_assert (!es->call_stmt_time);
3429 continue;
3431 if (!es->predicate
3432 || es->predicate->evaluate (possible_truths))
3434 /* Predicates of calls shall not use NOT_CHANGED codes,
3435 so we do not need to compute probabilities. */
3436 estimate_edge_size_and_time (e, size,
3437 es->predicate ? NULL : min_size,
3438 time, avals, hints);
3441 for (e = node->indirect_calls; e; e = e->next_callee)
3443 class ipa_call_summary *es = ipa_call_summaries->get (e);
3444 if (!es->predicate
3445 || es->predicate->evaluate (possible_truths))
3446 estimate_edge_size_and_time (e, size,
3447 es->predicate ? NULL : min_size,
3448 time, avals, hints);
3452 /* Populate sum->call_size_time_table for edges from NODE. */
3454 static void
3455 summarize_calls_size_and_time (struct cgraph_node *node,
3456 ipa_fn_summary *sum)
3458 struct cgraph_edge *e;
3459 for (e = node->callees; e; e = e->next_callee)
3461 if (!e->inline_failed)
3463 gcc_checking_assert (!ipa_call_summaries->get (e));
3464 summarize_calls_size_and_time (e->callee, sum);
3465 continue;
3467 int size = 0;
3468 sreal time = 0;
3470 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3472 ipa_predicate pred = true;
3473 class ipa_call_summary *es = ipa_call_summaries->get (e);
3475 if (es->predicate)
3476 pred = *es->predicate;
3477 sum->account_size_time (size, time, pred, pred, true);
3479 for (e = node->indirect_calls; e; e = e->next_callee)
3481 int size = 0;
3482 sreal time = 0;
3484 estimate_edge_size_and_time (e, &size, NULL, &time, NULL, NULL);
3485 ipa_predicate pred = true;
3486 class ipa_call_summary *es = ipa_call_summaries->get (e);
3488 if (es->predicate)
3489 pred = *es->predicate;
3490 sum->account_size_time (size, time, pred, pred, true);
3494 /* Increase SIZE, MIN_SIZE and TIME for size and time needed to handle all
3495 calls in NODE. POSSIBLE_TRUTHS and AVALS (the latter if non-NULL) describe
3496 context of the call site. */
3498 static void
3499 estimate_calls_size_and_time (struct cgraph_node *node, int *size,
3500 int *min_size, sreal *time,
3501 ipa_hints *hints,
3502 clause_t possible_truths,
3503 ipa_call_arg_values *avals)
3505 class ipa_fn_summary *sum = ipa_fn_summaries->get (node);
3506 bool use_table = true;
3508 gcc_assert (node->callees || node->indirect_calls);
3510 /* During early inlining we do not calculate info for very
3511 large functions and thus there is no need for producing
3512 summaries. */
3513 if (!ipa_node_params_sum)
3514 use_table = false;
3515 /* Do not calculate summaries for simple wrappers; it is waste
3516 of memory. */
3517 else if (node->callees && node->indirect_calls
3518 && node->callees->inline_failed && !node->callees->next_callee)
3519 use_table = false;
3520 /* If there is an indirect edge that may be optimized, we need
3521 to go the slow way. */
3522 else if (avals && hints
3523 && (avals->m_known_vals.length ()
3524 || avals->m_known_contexts.length ()
3525 || avals->m_known_aggs.length ()))
3527 ipa_node_params *params_summary = ipa_node_params_sum->get (node);
3528 unsigned int nargs = params_summary
3529 ? ipa_get_param_count (params_summary) : 0;
3531 for (unsigned int i = 0; i < nargs && use_table; i++)
3533 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3534 && (avals->safe_sval_at (i)
3535 || (ipa_argagg_value_list (avals).value_for_index_p (i))))
3536 use_table = false;
3537 else if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3538 && (avals->m_known_contexts.length () > i
3539 && !avals->m_known_contexts[i].useless_p ()))
3540 use_table = false;
3544 /* Fast path is via the call size time table. */
3545 if (use_table)
3547 /* Build summary if it is absent. */
3548 if (!sum->call_size_time_table.length ())
3550 ipa_predicate true_pred = true;
3551 sum->account_size_time (0, 0, true_pred, true_pred, true);
3552 summarize_calls_size_and_time (node, sum);
3555 int old_size = *size;
3556 sreal old_time = time ? *time : 0;
3558 if (min_size)
3559 *min_size += sum->call_size_time_table[0].size;
3561 unsigned int i;
3562 size_time_entry *e;
3564 /* Walk the table and account sizes and times. */
3565 for (i = 0; sum->call_size_time_table.iterate (i, &e);
3566 i++)
3567 if (e->exec_predicate.evaluate (possible_truths))
3569 *size += e->size;
3570 if (time)
3571 *time += e->time;
3574 /* Be careful and see if both methods agree. */
3575 if ((flag_checking || dump_file)
3576 /* Do not try to sanity check when we know we lost some
3577 precision. */
3578 && sum->call_size_time_table.length ()
3579 < ipa_fn_summary::max_size_time_table_size)
3581 estimate_calls_size_and_time_1 (node, &old_size, NULL, &old_time, NULL,
3582 possible_truths, avals);
3583 gcc_assert (*size == old_size);
3584 if (time && (*time - old_time > 1 || *time - old_time < -1)
3585 && dump_file)
3586 fprintf (dump_file, "Time mismatch in call summary %f!=%f\n",
3587 old_time.to_double (),
3588 time->to_double ());
3591 /* Slow path by walking all edges. */
3592 else
3593 estimate_calls_size_and_time_1 (node, size, min_size, time, hints,
3594 possible_truths, avals);
3597 /* Main constructor for ipa call context. Memory allocation of ARG_VALUES
3598 is owned by the caller. INLINE_PARAM_SUMMARY is also owned by the
3599 caller. */
3601 ipa_call_context::ipa_call_context (cgraph_node *node, clause_t possible_truths,
3602 clause_t nonspec_possible_truths,
3603 vec<inline_param_summary>
3604 inline_param_summary,
3605 ipa_auto_call_arg_values *arg_values)
3606 : m_node (node), m_possible_truths (possible_truths),
3607 m_nonspec_possible_truths (nonspec_possible_truths),
3608 m_inline_param_summary (inline_param_summary),
3609 m_avals (arg_values)
3613 /* Set THIS to be a duplicate of CTX. Copy all relevant info. */
3615 void
3616 ipa_cached_call_context::duplicate_from (const ipa_call_context &ctx)
3618 m_node = ctx.m_node;
3619 m_possible_truths = ctx.m_possible_truths;
3620 m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
3621 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3622 unsigned int nargs = params_summary
3623 ? ipa_get_param_count (params_summary) : 0;
3625 m_inline_param_summary = vNULL;
3626 /* Copy the info only if there is at least one useful entry. */
3627 if (ctx.m_inline_param_summary.exists ())
3629 unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
3631 for (unsigned int i = 0; i < n; i++)
3632 if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
3633 && !ctx.m_inline_param_summary[i].useless_p ())
3635 m_inline_param_summary
3636 = ctx.m_inline_param_summary.copy ();
3637 break;
3640 m_avals.m_known_vals = vNULL;
3641 if (ctx.m_avals.m_known_vals.exists ())
3643 unsigned int n = MIN (ctx.m_avals.m_known_vals.length (), nargs);
3645 for (unsigned int i = 0; i < n; i++)
3646 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3647 && ctx.m_avals.m_known_vals[i])
3649 m_avals.m_known_vals = ctx.m_avals.m_known_vals.copy ();
3650 break;
3654 m_avals.m_known_contexts = vNULL;
3655 if (ctx.m_avals.m_known_contexts.exists ())
3657 unsigned int n = MIN (ctx.m_avals.m_known_contexts.length (), nargs);
3659 for (unsigned int i = 0; i < n; i++)
3660 if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
3661 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3663 m_avals.m_known_contexts = ctx.m_avals.m_known_contexts.copy ();
3664 break;
3668 m_avals.m_known_aggs = vNULL;
3669 if (ctx.m_avals.m_known_aggs.exists ())
3671 const ipa_argagg_value_list avl (&ctx.m_avals);
3672 for (unsigned int i = 0; i < nargs; i++)
3673 if (ipa_is_param_used_by_indirect_call (params_summary, i)
3674 && avl.value_for_index_p (i))
3676 m_avals.m_known_aggs = ctx.m_avals.m_known_aggs.copy ();
3677 break;
3681 m_avals.m_known_value_ranges = vNULL;
3684 /* Release memory used by known_vals/contexts/aggs vectors. and
3685 inline_param_summary. */
3687 void
3688 ipa_cached_call_context::release ()
3690 /* See if context is initialized at first place. */
3691 if (!m_node)
3692 return;
3693 m_avals.m_known_aggs.release ();
3694 m_avals.m_known_vals.release ();
3695 m_avals.m_known_contexts.release ();
3696 m_inline_param_summary.release ();
3699 /* Return true if CTX describes the same call context as THIS. */
3701 bool
3702 ipa_call_context::equal_to (const ipa_call_context &ctx)
3704 if (m_node != ctx.m_node
3705 || m_possible_truths != ctx.m_possible_truths
3706 || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
3707 return false;
3709 ipa_node_params *params_summary = ipa_node_params_sum->get (m_node);
3710 unsigned int nargs = params_summary
3711 ? ipa_get_param_count (params_summary) : 0;
3713 if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
3715 for (unsigned int i = 0; i < nargs; i++)
3717 if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
3718 continue;
3719 if (i >= m_inline_param_summary.length ()
3720 || m_inline_param_summary[i].useless_p ())
3722 if (i < ctx.m_inline_param_summary.length ()
3723 && !ctx.m_inline_param_summary[i].useless_p ())
3724 return false;
3725 continue;
3727 if (i >= ctx.m_inline_param_summary.length ()
3728 || ctx.m_inline_param_summary[i].useless_p ())
3730 if (i < m_inline_param_summary.length ()
3731 && !m_inline_param_summary[i].useless_p ())
3732 return false;
3733 continue;
3735 if (!m_inline_param_summary[i].equal_to
3736 (ctx.m_inline_param_summary[i]))
3737 return false;
3740 if (m_avals.m_known_vals.exists () || ctx.m_avals.m_known_vals.exists ())
3742 for (unsigned int i = 0; i < nargs; i++)
3744 if (!ipa_is_param_used_by_indirect_call (params_summary, i))
3745 continue;
3746 if (i >= m_avals.m_known_vals.length () || !m_avals.m_known_vals[i])
3748 if (i < ctx.m_avals.m_known_vals.length ()
3749 && ctx.m_avals.m_known_vals[i])
3750 return false;
3751 continue;
3753 if (i >= ctx.m_avals.m_known_vals.length ()
3754 || !ctx.m_avals.m_known_vals[i])
3756 if (i < m_avals.m_known_vals.length () && m_avals.m_known_vals[i])
3757 return false;
3758 continue;
3760 if (m_avals.m_known_vals[i] != ctx.m_avals.m_known_vals[i])
3761 return false;
3764 if (m_avals.m_known_contexts.exists ()
3765 || ctx.m_avals.m_known_contexts.exists ())
3767 for (unsigned int i = 0; i < nargs; i++)
3769 if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
3770 continue;
3771 if (i >= m_avals.m_known_contexts.length ()
3772 || m_avals.m_known_contexts[i].useless_p ())
3774 if (i < ctx.m_avals.m_known_contexts.length ()
3775 && !ctx.m_avals.m_known_contexts[i].useless_p ())
3776 return false;
3777 continue;
3779 if (i >= ctx.m_avals.m_known_contexts.length ()
3780 || ctx.m_avals.m_known_contexts[i].useless_p ())
3782 if (i < m_avals.m_known_contexts.length ()
3783 && !m_avals.m_known_contexts[i].useless_p ())
3784 return false;
3785 continue;
3787 if (!m_avals.m_known_contexts[i].equal_to
3788 (ctx.m_avals.m_known_contexts[i]))
3789 return false;
3792 if (m_avals.m_known_aggs.exists () || ctx.m_avals.m_known_aggs.exists ())
3794 unsigned i = 0, j = 0;
3795 while (i < m_avals.m_known_aggs.length ()
3796 || j < ctx.m_avals.m_known_aggs.length ())
3798 if (i >= m_avals.m_known_aggs.length ())
3800 int idx2 = ctx.m_avals.m_known_aggs[j].index;
3801 if (ipa_is_param_used_by_indirect_call (params_summary, idx2))
3802 return false;
3803 j++;
3804 continue;
3806 if (j >= ctx.m_avals.m_known_aggs.length ())
3808 int idx1 = m_avals.m_known_aggs[i].index;
3809 if (ipa_is_param_used_by_indirect_call (params_summary, idx1))
3810 return false;
3811 i++;
3812 continue;
3815 int idx1 = m_avals.m_known_aggs[i].index;
3816 int idx2 = ctx.m_avals.m_known_aggs[j].index;
3817 if (idx1 < idx2)
3819 if (ipa_is_param_used_by_indirect_call (params_summary, idx1))
3820 return false;
3821 i++;
3822 continue;
3824 if (idx1 > idx2)
3826 if (ipa_is_param_used_by_indirect_call (params_summary, idx2))
3827 return false;
3828 j++;
3829 continue;
3831 if (!ipa_is_param_used_by_indirect_call (params_summary, idx1))
3833 i++;
3834 j++;
3835 continue;
3838 if ((m_avals.m_known_aggs[i].unit_offset
3839 != ctx.m_avals.m_known_aggs[j].unit_offset)
3840 || (m_avals.m_known_aggs[i].by_ref
3841 != ctx.m_avals.m_known_aggs[j].by_ref)
3842 || !operand_equal_p (m_avals.m_known_aggs[i].value,
3843 ctx.m_avals.m_known_aggs[j].value))
3844 return false;
3845 i++;
3846 j++;
3849 return true;
3852 /* Fill in the selected fields in ESTIMATES with value estimated for call in
3853 this context. Always compute size and min_size. Only compute time and
3854 nonspecialized_time if EST_TIMES is true. Only compute hints if EST_HINTS
3855 is true. */
3857 void
3858 ipa_call_context::estimate_size_and_time (ipa_call_estimates *estimates,
3859 bool est_times, bool est_hints)
3861 class ipa_fn_summary *info = ipa_fn_summaries->get (m_node);
3862 size_time_entry *e;
3863 int size = 0;
3864 sreal time = 0;
3865 int min_size = 0;
3866 ipa_hints hints = 0;
3867 sreal loops_with_known_iterations = 0;
3868 sreal loops_with_known_strides = 0;
3869 int i;
3871 if (dump_file && (dump_flags & TDF_DETAILS))
3873 bool found = false;
3874 fprintf (dump_file, " Estimating body: %s\n"
3875 " Known to be false: ", m_node->dump_name ());
3877 for (i = ipa_predicate::not_inlined_condition;
3878 i < (ipa_predicate::first_dynamic_condition
3879 + (int) vec_safe_length (info->conds)); i++)
3880 if (!(m_possible_truths & (1 << i)))
3882 if (found)
3883 fprintf (dump_file, ", ");
3884 found = true;
3885 dump_condition (dump_file, info->conds, i);
3889 if (m_node->callees || m_node->indirect_calls)
3890 estimate_calls_size_and_time (m_node, &size, &min_size,
3891 est_times ? &time : NULL,
3892 est_hints ? &hints : NULL, m_possible_truths,
3893 &m_avals);
3895 sreal nonspecialized_time = time;
3897 min_size += info->size_time_table[0].size;
3898 for (i = 0; info->size_time_table.iterate (i, &e); i++)
3900 bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
3902 /* Because predicates are conservative, it can happen that nonconst is 1
3903 but exec is 0. */
3904 if (exec)
3906 bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
3908 gcc_checking_assert (e->time >= 0);
3909 gcc_checking_assert (time >= 0);
3911 /* We compute specialized size only because size of nonspecialized
3912 copy is context independent.
3914 The difference between nonspecialized execution and specialized is
3915 that nonspecialized is not going to have optimized out computations
3916 known to be constant in a specialized setting. */
3917 if (nonconst)
3918 size += e->size;
3919 if (!est_times)
3920 continue;
3921 nonspecialized_time += e->time;
3922 if (!nonconst)
3924 else if (!m_inline_param_summary.exists ())
3926 if (nonconst)
3927 time += e->time;
3929 else
3931 int prob = e->nonconst_predicate.probability
3932 (info->conds, m_possible_truths,
3933 m_inline_param_summary);
3934 gcc_checking_assert (prob >= 0);
3935 gcc_checking_assert (prob <= REG_BR_PROB_BASE);
3936 if (prob == REG_BR_PROB_BASE)
3937 time += e->time;
3938 else
3939 time += e->time * prob / REG_BR_PROB_BASE;
3941 gcc_checking_assert (time >= 0);
3944 gcc_checking_assert (info->size_time_table[0].exec_predicate == true);
3945 gcc_checking_assert (info->size_time_table[0].nonconst_predicate == true);
3946 gcc_checking_assert (min_size >= 0);
3947 gcc_checking_assert (size >= 0);
3948 gcc_checking_assert (time >= 0);
3949 /* nonspecialized_time should be always bigger than specialized time.
3950 Roundoff issues however may get into the way. */
3951 gcc_checking_assert ((nonspecialized_time - time * 99 / 100) >= -1);
3953 /* Roundoff issues may make specialized time bigger than nonspecialized
3954 time. We do not really want that to happen because some heuristics
3955 may get confused by seeing negative speedups. */
3956 if (time > nonspecialized_time)
3957 time = nonspecialized_time;
3959 if (est_hints)
3961 if (info->scc_no)
3962 hints |= INLINE_HINT_in_scc;
3963 if (DECL_DECLARED_INLINE_P (m_node->decl))
3964 hints |= INLINE_HINT_declared_inline;
3965 if (info->builtin_constant_p_parms.length ()
3966 && DECL_DECLARED_INLINE_P (m_node->decl))
3967 hints |= INLINE_HINT_builtin_constant_p;
3969 ipa_freqcounting_predicate *fcp;
3970 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
3971 if (!fcp->predicate->evaluate (m_possible_truths))
3973 hints |= INLINE_HINT_loop_iterations;
3974 loops_with_known_iterations += fcp->freq;
3976 estimates->loops_with_known_iterations = loops_with_known_iterations;
3978 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
3979 if (!fcp->predicate->evaluate (m_possible_truths))
3981 hints |= INLINE_HINT_loop_stride;
3982 loops_with_known_strides += fcp->freq;
3984 estimates->loops_with_known_strides = loops_with_known_strides;
3987 size = RDIV (size, ipa_fn_summary::size_scale);
3988 min_size = RDIV (min_size, ipa_fn_summary::size_scale);
3990 if (dump_file && (dump_flags & TDF_DETAILS))
3992 fprintf (dump_file, "\n size:%i", (int) size);
3993 if (est_times)
3994 fprintf (dump_file, " time:%f nonspec time:%f",
3995 time.to_double (), nonspecialized_time.to_double ());
3996 if (est_hints)
3997 fprintf (dump_file, " loops with known iterations:%f "
3998 "known strides:%f", loops_with_known_iterations.to_double (),
3999 loops_with_known_strides.to_double ());
4000 fprintf (dump_file, "\n");
4002 if (est_times)
4004 estimates->time = time;
4005 estimates->nonspecialized_time = nonspecialized_time;
4007 estimates->size = size;
4008 estimates->min_size = min_size;
4009 if (est_hints)
4010 estimates->hints = hints;
4011 return;
4015 /* Estimate size and time needed to execute callee of EDGE assuming that
4016 parameters known to be constant at caller of EDGE are propagated.
4017 KNOWN_VALS and KNOWN_CONTEXTS are vectors of assumed known constant values
4018 and types for parameters. */
4020 void
4021 estimate_ipcp_clone_size_and_time (struct cgraph_node *node,
4022 ipa_auto_call_arg_values *avals,
4023 ipa_call_estimates *estimates)
4025 clause_t clause, nonspec_clause;
4027 evaluate_conditions_for_known_args (node, false, avals, &clause,
4028 &nonspec_clause, NULL);
4029 ipa_call_context ctx (node, clause, nonspec_clause, vNULL, avals);
4030 ctx.estimate_size_and_time (estimates);
4033 /* Return stack frame offset where frame of NODE is supposed to start inside
4034 of the function it is inlined to.
4035 Return 0 for functions that are not inlined. */
4037 HOST_WIDE_INT
4038 ipa_get_stack_frame_offset (struct cgraph_node *node)
4040 HOST_WIDE_INT offset = 0;
4041 if (!node->inlined_to)
4042 return 0;
4043 node = node->callers->caller;
4044 while (true)
4046 offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
4047 if (!node->inlined_to)
4048 return offset;
4049 node = node->callers->caller;
4054 /* Update summary information of inline clones after inlining.
4055 Compute peak stack usage. */
4057 static void
4058 inline_update_callee_summaries (struct cgraph_node *node, int depth)
4060 struct cgraph_edge *e;
4062 ipa_propagate_frequency (node);
4063 for (e = node->callees; e; e = e->next_callee)
4065 if (!e->inline_failed)
4066 inline_update_callee_summaries (e->callee, depth);
4067 else
4068 ipa_call_summaries->get (e)->loop_depth += depth;
4070 for (e = node->indirect_calls; e; e = e->next_callee)
4071 ipa_call_summaries->get (e)->loop_depth += depth;
4074 /* Update change_prob and points_to_local_or_readonly_memory of EDGE after
4075 INLINED_EDGE has been inlined.
4077 When function A is inlined in B and A calls C with parameter that
4078 changes with probability PROB1 and C is known to be passthrough
4079 of argument if B that change with probability PROB2, the probability
4080 of change is now PROB1*PROB2. */
4082 static void
4083 remap_edge_params (struct cgraph_edge *inlined_edge,
4084 struct cgraph_edge *edge)
4086 if (ipa_node_params_sum)
4088 int i;
4089 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4090 if (!args)
4091 return;
4092 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4093 class ipa_call_summary *inlined_es
4094 = ipa_call_summaries->get (inlined_edge);
4096 if (es->param.length () == 0)
4097 return;
4099 for (i = 0; i < ipa_get_cs_argument_count (args); i++)
4101 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4102 if (jfunc->type == IPA_JF_PASS_THROUGH
4103 || jfunc->type == IPA_JF_ANCESTOR)
4105 int id = jfunc->type == IPA_JF_PASS_THROUGH
4106 ? ipa_get_jf_pass_through_formal_id (jfunc)
4107 : ipa_get_jf_ancestor_formal_id (jfunc);
4108 if (id < (int) inlined_es->param.length ())
4110 int prob1 = es->param[i].change_prob;
4111 int prob2 = inlined_es->param[id].change_prob;
4112 int prob = combine_probabilities (prob1, prob2);
4114 if (prob1 && prob2 && !prob)
4115 prob = 1;
4117 es->param[i].change_prob = prob;
4119 if (inlined_es
4120 ->param[id].points_to_local_or_readonly_memory)
4121 es->param[i].points_to_local_or_readonly_memory = true;
4122 if (inlined_es
4123 ->param[id].points_to_possible_sra_candidate)
4124 es->param[i].points_to_possible_sra_candidate = true;
4126 if (!es->param[i].points_to_local_or_readonly_memory
4127 && jfunc->type == IPA_JF_CONST
4128 && points_to_local_or_readonly_memory_p
4129 (ipa_get_jf_constant (jfunc)))
4130 es->param[i].points_to_local_or_readonly_memory = true;
4136 /* Update edge summaries of NODE after INLINED_EDGE has been inlined.
4138 Remap predicates of callees of NODE. Rest of arguments match
4139 remap_predicate.
4141 Also update change probabilities. */
4143 static void
4144 remap_edge_summaries (struct cgraph_edge *inlined_edge,
4145 struct cgraph_node *node,
4146 class ipa_fn_summary *info,
4147 class ipa_node_params *params_summary,
4148 class ipa_fn_summary *callee_info,
4149 const vec<int> &operand_map,
4150 const vec<HOST_WIDE_INT> &offset_map,
4151 clause_t possible_truths,
4152 ipa_predicate *toplev_predicate)
4154 struct cgraph_edge *e, *next;
4155 for (e = node->callees; e; e = next)
4157 ipa_predicate p;
4158 next = e->next_callee;
4160 if (e->inline_failed)
4162 class ipa_call_summary *es = ipa_call_summaries->get (e);
4163 remap_edge_params (inlined_edge, e);
4165 if (es->predicate)
4167 p = es->predicate->remap_after_inlining
4168 (info, params_summary,
4169 callee_info, operand_map,
4170 offset_map, possible_truths,
4171 *toplev_predicate);
4172 edge_set_predicate (e, &p);
4174 else
4175 edge_set_predicate (e, toplev_predicate);
4177 else
4178 remap_edge_summaries (inlined_edge, e->callee, info,
4179 params_summary, callee_info,
4180 operand_map, offset_map, possible_truths,
4181 toplev_predicate);
4183 for (e = node->indirect_calls; e; e = next)
4185 class ipa_call_summary *es = ipa_call_summaries->get (e);
4186 ipa_predicate p;
4187 next = e->next_callee;
4189 remap_edge_params (inlined_edge, e);
4190 if (es->predicate)
4192 p = es->predicate->remap_after_inlining
4193 (info, params_summary,
4194 callee_info, operand_map, offset_map,
4195 possible_truths, *toplev_predicate);
4196 edge_set_predicate (e, &p);
4198 else
4199 edge_set_predicate (e, toplev_predicate);
4203 /* Run remap_after_inlining on each predicate in V. */
4205 static void
4206 remap_freqcounting_predicate (class ipa_fn_summary *info,
4207 class ipa_node_params *params_summary,
4208 class ipa_fn_summary *callee_info,
4209 vec<ipa_freqcounting_predicate, va_gc> *v,
4210 const vec<int> &operand_map,
4211 const vec<HOST_WIDE_INT> &offset_map,
4212 clause_t possible_truths,
4213 ipa_predicate *toplev_predicate)
4216 ipa_freqcounting_predicate *fcp;
4217 for (int i = 0; vec_safe_iterate (v, i, &fcp); i++)
4219 ipa_predicate p
4220 = fcp->predicate->remap_after_inlining (info, params_summary,
4221 callee_info, operand_map,
4222 offset_map, possible_truths,
4223 *toplev_predicate);
4224 if (p != false && p != true)
4225 *fcp->predicate &= p;
4229 /* We inlined EDGE. Update summary of the function we inlined into. */
4231 void
4232 ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
4234 ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
4235 struct cgraph_node *to = (edge->caller->inlined_to
4236 ? edge->caller->inlined_to : edge->caller);
4237 class ipa_fn_summary *info = ipa_fn_summaries->get (to);
4238 clause_t clause = 0; /* not_inline is known to be false. */
4239 size_time_entry *e;
4240 auto_vec<int, 8> operand_map;
4241 auto_vec<HOST_WIDE_INT, 8> offset_map;
4242 int i;
4243 ipa_predicate toplev_predicate;
4244 class ipa_call_summary *es = ipa_call_summaries->get (edge);
4245 ipa_node_params *params_summary = (ipa_node_params_sum
4246 ? ipa_node_params_sum->get (to) : NULL);
4248 if (es->predicate)
4249 toplev_predicate = *es->predicate;
4250 else
4251 toplev_predicate = true;
4253 info->fp_expressions |= callee_info->fp_expressions;
4254 info->target_info |= callee_info->target_info;
4256 if (callee_info->conds)
4258 ipa_auto_call_arg_values avals;
4259 evaluate_properties_for_edge (edge, true, &clause, NULL, &avals, false);
4261 if (ipa_node_params_sum && callee_info->conds)
4263 ipa_edge_args *args = ipa_edge_args_sum->get (edge);
4264 int count = args ? ipa_get_cs_argument_count (args) : 0;
4265 int i;
4267 if (count)
4269 operand_map.safe_grow_cleared (count, true);
4270 offset_map.safe_grow_cleared (count, true);
4272 for (i = 0; i < count; i++)
4274 struct ipa_jump_func *jfunc = ipa_get_ith_jump_func (args, i);
4275 int map = -1;
4277 /* TODO: handle non-NOPs when merging. */
4278 if (jfunc->type == IPA_JF_PASS_THROUGH)
4280 if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
4281 map = ipa_get_jf_pass_through_formal_id (jfunc);
4282 if (!ipa_get_jf_pass_through_agg_preserved (jfunc))
4283 offset_map[i] = -1;
4285 else if (jfunc->type == IPA_JF_ANCESTOR)
4287 HOST_WIDE_INT offset = ipa_get_jf_ancestor_offset (jfunc);
4288 if (offset >= 0 && offset < INT_MAX)
4290 map = ipa_get_jf_ancestor_formal_id (jfunc);
4291 if (!ipa_get_jf_ancestor_agg_preserved (jfunc))
4292 offset = -1;
4293 offset_map[i] = offset;
4296 operand_map[i] = map;
4297 gcc_assert (map < ipa_get_param_count (params_summary));
4300 int ip;
4301 for (i = 0; callee_info->builtin_constant_p_parms.iterate (i, &ip); i++)
4302 if (ip < count && operand_map[ip] >= 0)
4303 add_builtin_constant_p_parm (info, operand_map[ip]);
4305 sreal freq = edge->sreal_frequency ();
4306 for (i = 0; callee_info->size_time_table.iterate (i, &e); i++)
4308 ipa_predicate p;
4309 p = e->exec_predicate.remap_after_inlining
4310 (info, params_summary,
4311 callee_info, operand_map,
4312 offset_map, clause,
4313 toplev_predicate);
4314 ipa_predicate nonconstp;
4315 nonconstp = e->nonconst_predicate.remap_after_inlining
4316 (info, params_summary,
4317 callee_info, operand_map,
4318 offset_map, clause,
4319 toplev_predicate);
4320 if (p != false && nonconstp != false)
4322 sreal add_time = ((sreal)e->time * freq);
4323 int prob = e->nonconst_predicate.probability (callee_info->conds,
4324 clause, es->param);
4325 if (prob != REG_BR_PROB_BASE)
4326 add_time = add_time * prob / REG_BR_PROB_BASE;
4327 if (prob != REG_BR_PROB_BASE
4328 && dump_file && (dump_flags & TDF_DETAILS))
4330 fprintf (dump_file, "\t\tScaling time by probability:%f\n",
4331 (double) prob / REG_BR_PROB_BASE);
4333 info->account_size_time (e->size, add_time, p, nonconstp);
4336 remap_edge_summaries (edge, edge->callee, info, params_summary,
4337 callee_info, operand_map,
4338 offset_map, clause, &toplev_predicate);
4339 remap_freqcounting_predicate (info, params_summary, callee_info,
4340 info->loop_iterations, operand_map,
4341 offset_map, clause, &toplev_predicate);
4342 remap_freqcounting_predicate (info, params_summary, callee_info,
4343 info->loop_strides, operand_map,
4344 offset_map, clause, &toplev_predicate);
4346 HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
4347 HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
4349 if (info->estimated_stack_size < peak)
4350 info->estimated_stack_size = peak;
4352 inline_update_callee_summaries (edge->callee, es->loop_depth);
4353 if (info->call_size_time_table.length ())
4355 int edge_size = 0;
4356 sreal edge_time = 0;
4358 estimate_edge_size_and_time (edge, &edge_size, NULL, &edge_time, NULL, 0);
4359 /* Unaccount size and time of the optimized out call. */
4360 info->account_size_time (-edge_size, -edge_time,
4361 es->predicate ? *es->predicate : true,
4362 es->predicate ? *es->predicate : true,
4363 true);
4364 /* Account new calls. */
4365 summarize_calls_size_and_time (edge->callee, info);
4368 /* Free summaries that are not maintained for inline clones/edges. */
4369 ipa_call_summaries->remove (edge);
4370 ipa_fn_summaries->remove (edge->callee);
4371 ipa_remove_from_growth_caches (edge);
4374 /* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
4375 overall size and time. Recompute it.
4376 If RESET is true also recompute call_time_size_table. */
4378 void
4379 ipa_update_overall_fn_summary (struct cgraph_node *node, bool reset)
4381 class ipa_fn_summary *info = ipa_fn_summaries->get (node);
4382 class ipa_size_summary *size_info = ipa_size_summaries->get (node);
4383 size_time_entry *e;
4384 int i;
4386 size_info->size = 0;
4387 info->time = 0;
4388 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4390 size_info->size += e->size;
4391 info->time += e->time;
4393 info->min_size = info->size_time_table[0].size;
4394 if (reset)
4395 info->call_size_time_table.release ();
4396 if (node->callees || node->indirect_calls)
4397 estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
4398 &info->time, NULL,
4399 ~(clause_t) (1 << ipa_predicate::false_condition),
4400 NULL);
4401 size_info->size = RDIV (size_info->size, ipa_fn_summary::size_scale);
4402 info->min_size = RDIV (info->min_size, ipa_fn_summary::size_scale);
4406 /* This function performs intraprocedural analysis in NODE that is required to
4407 inline indirect calls. */
4409 static void
4410 inline_indirect_intraprocedural_analysis (struct cgraph_node *node)
4412 ipa_analyze_node (node);
4413 if (dump_file && (dump_flags & TDF_DETAILS))
4415 ipa_print_node_params (dump_file, node);
4416 ipa_print_node_jump_functions (dump_file, node);
4421 /* Note function body size. */
4423 void
4424 inline_analyze_function (struct cgraph_node *node)
4426 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
4428 if (dump_file)
4429 fprintf (dump_file, "\nAnalyzing function: %s\n", node->dump_name ());
4430 if (opt_for_fn (node->decl, optimize) && !node->thunk)
4431 inline_indirect_intraprocedural_analysis (node);
4432 compute_fn_summary (node, false);
4433 if (!optimize)
4435 struct cgraph_edge *e;
4436 for (e = node->callees; e; e = e->next_callee)
4437 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4438 for (e = node->indirect_calls; e; e = e->next_callee)
4439 e->inline_failed = CIF_FUNCTION_NOT_OPTIMIZED;
4442 pop_cfun ();
4446 /* Called when new function is inserted to callgraph late. */
4448 void
4449 ipa_fn_summary_t::insert (struct cgraph_node *node, ipa_fn_summary *)
4451 inline_analyze_function (node);
4454 /* Note function body size. */
4456 static void
4457 ipa_fn_summary_generate (void)
4459 struct cgraph_node *node;
4461 FOR_EACH_DEFINED_FUNCTION (node)
4462 if (DECL_STRUCT_FUNCTION (node->decl))
4463 node->versionable = tree_versionable_function_p (node->decl);
4465 ipa_fn_summary_alloc ();
4467 ipa_fn_summaries->enable_insertion_hook ();
4469 ipa_register_cgraph_hooks ();
4471 FOR_EACH_DEFINED_FUNCTION (node)
4472 if (!node->alias
4473 && (flag_generate_lto || flag_generate_offload|| flag_wpa
4474 || opt_for_fn (node->decl, optimize)))
4475 inline_analyze_function (node);
4479 /* Write inline summary for edge E to OB. */
4481 static void
4482 read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
4483 bool prevails)
4485 class ipa_call_summary *es = prevails
4486 ? ipa_call_summaries->get_create (e) : NULL;
4487 ipa_predicate p;
4488 int length, i;
4490 int size = streamer_read_uhwi (ib);
4491 int time = streamer_read_uhwi (ib);
4492 int depth = streamer_read_uhwi (ib);
4494 if (es)
4496 es->call_stmt_size = size;
4497 es->call_stmt_time = time;
4498 es->loop_depth = depth;
4501 bitpack_d bp = streamer_read_bitpack (ib);
4502 if (es)
4503 es->is_return_callee_uncaptured = bp_unpack_value (&bp, 1);
4504 else
4505 bp_unpack_value (&bp, 1);
4507 p.stream_in (ib);
4508 if (es)
4509 edge_set_predicate (e, &p);
4510 length = streamer_read_uhwi (ib);
4511 if (length && es
4512 && (e->possibly_call_in_translation_unit_p ()
4513 /* Also stream in jump functions to builtins in hope that they
4514 will get fnspecs. */
4515 || fndecl_built_in_p (e->callee->decl, BUILT_IN_NORMAL)))
4517 es->param.safe_grow_cleared (length, true);
4518 for (i = 0; i < length; i++)
4520 es->param[i].change_prob = streamer_read_uhwi (ib);
4521 bitpack_d bp = streamer_read_bitpack (ib);
4522 es->param[i].points_to_local_or_readonly_memory
4523 = bp_unpack_value (&bp, 1);
4524 es->param[i].points_to_possible_sra_candidate
4525 = bp_unpack_value (&bp, 1);
4528 else
4530 for (i = 0; i < length; i++)
4532 streamer_read_uhwi (ib);
4533 streamer_read_uhwi (ib);
4539 /* Stream in inline summaries from the section. */
4541 static void
4542 inline_read_section (struct lto_file_decl_data *file_data, const char *data,
4543 size_t len)
4545 const struct lto_function_header *header =
4546 (const struct lto_function_header *) data;
4547 const int cfg_offset = sizeof (struct lto_function_header);
4548 const int main_offset = cfg_offset + header->cfg_size;
4549 const int string_offset = main_offset + header->main_size;
4550 class data_in *data_in;
4551 unsigned int i, count2, j;
4552 unsigned int f_count;
4554 lto_input_block ib ((const char *) data + main_offset, header->main_size,
4555 file_data);
4557 data_in =
4558 lto_data_in_create (file_data, (const char *) data + string_offset,
4559 header->string_size, vNULL);
4560 f_count = streamer_read_uhwi (&ib);
4561 for (i = 0; i < f_count; i++)
4563 unsigned int index;
4564 struct cgraph_node *node;
4565 class ipa_fn_summary *info;
4566 class ipa_node_params *params_summary;
4567 class ipa_size_summary *size_info;
4568 lto_symtab_encoder_t encoder;
4569 struct bitpack_d bp;
4570 struct cgraph_edge *e;
4571 ipa_predicate p;
4573 index = streamer_read_uhwi (&ib);
4574 encoder = file_data->symtab_node_encoder;
4575 node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
4576 index));
4577 info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
4578 params_summary = node->prevailing_p ()
4579 ? ipa_node_params_sum->get (node) : NULL;
4580 size_info = node->prevailing_p ()
4581 ? ipa_size_summaries->get_create (node) : NULL;
4583 int stack_size = streamer_read_uhwi (&ib);
4584 int size = streamer_read_uhwi (&ib);
4585 sreal time = sreal::stream_in (&ib);
4587 if (info)
4589 info->estimated_stack_size
4590 = size_info->estimated_self_stack_size = stack_size;
4591 size_info->size = size_info->self_size = size;
4592 info->time = time;
4595 bp = streamer_read_bitpack (&ib);
4596 if (info)
4598 info->inlinable = bp_unpack_value (&bp, 1);
4599 info->fp_expressions = bp_unpack_value (&bp, 1);
4600 if (!lto_stream_offload_p)
4601 info->target_info = streamer_read_uhwi (&ib);
4603 else
4605 bp_unpack_value (&bp, 1);
4606 bp_unpack_value (&bp, 1);
4607 if (!lto_stream_offload_p)
4608 streamer_read_uhwi (&ib);
4611 count2 = streamer_read_uhwi (&ib);
4612 gcc_assert (!info || !info->conds);
4613 if (info)
4614 vec_safe_reserve_exact (info->conds, count2);
4615 for (j = 0; j < count2; j++)
4617 struct condition c;
4618 unsigned int k, count3;
4619 c.operand_num = streamer_read_uhwi (&ib);
4620 c.code = (enum tree_code) streamer_read_uhwi (&ib);
4621 c.type = stream_read_tree (&ib, data_in);
4622 c.val = stream_read_tree (&ib, data_in);
4623 bp = streamer_read_bitpack (&ib);
4624 c.agg_contents = bp_unpack_value (&bp, 1);
4625 c.by_ref = bp_unpack_value (&bp, 1);
4626 if (c.agg_contents)
4627 c.offset = streamer_read_uhwi (&ib);
4628 count3 = streamer_read_uhwi (&ib);
4629 c.param_ops = NULL;
4630 if (info)
4631 vec_safe_reserve_exact (c.param_ops, count3);
4632 if (params_summary)
4633 ipa_set_param_used_by_ipa_predicates
4634 (params_summary, c.operand_num, true);
4635 for (k = 0; k < count3; k++)
4637 struct expr_eval_op op;
4638 enum gimple_rhs_class rhs_class;
4639 op.code = (enum tree_code) streamer_read_uhwi (&ib);
4640 op.type = stream_read_tree (&ib, data_in);
4641 switch (rhs_class = get_gimple_rhs_class (op.code))
4643 case GIMPLE_UNARY_RHS:
4644 op.index = 0;
4645 op.val[0] = NULL_TREE;
4646 op.val[1] = NULL_TREE;
4647 break;
4649 case GIMPLE_BINARY_RHS:
4650 case GIMPLE_TERNARY_RHS:
4651 bp = streamer_read_bitpack (&ib);
4652 op.index = bp_unpack_value (&bp, 2);
4653 op.val[0] = stream_read_tree (&ib, data_in);
4654 if (rhs_class == GIMPLE_BINARY_RHS)
4655 op.val[1] = NULL_TREE;
4656 else
4657 op.val[1] = stream_read_tree (&ib, data_in);
4658 break;
4660 default:
4661 fatal_error (UNKNOWN_LOCATION,
4662 "invalid fnsummary in LTO stream");
4664 if (info)
4665 c.param_ops->quick_push (op);
4667 if (info)
4668 info->conds->quick_push (c);
4670 count2 = streamer_read_uhwi (&ib);
4671 gcc_assert (!info || !info->size_time_table.length ());
4672 if (info && count2)
4673 info->size_time_table.reserve_exact (count2);
4674 for (j = 0; j < count2; j++)
4676 class size_time_entry e;
4678 e.size = streamer_read_uhwi (&ib);
4679 e.time = sreal::stream_in (&ib);
4680 e.exec_predicate.stream_in (&ib);
4681 e.nonconst_predicate.stream_in (&ib);
4683 if (info)
4684 info->size_time_table.quick_push (e);
4687 count2 = streamer_read_uhwi (&ib);
4688 for (j = 0; j < count2; j++)
4690 p.stream_in (&ib);
4691 sreal fcp_freq = sreal::stream_in (&ib);
4692 if (info)
4694 ipa_freqcounting_predicate fcp;
4695 fcp.predicate = NULL;
4696 set_hint_predicate (&fcp.predicate, p);
4697 fcp.freq = fcp_freq;
4698 vec_safe_push (info->loop_iterations, fcp);
4701 count2 = streamer_read_uhwi (&ib);
4702 for (j = 0; j < count2; j++)
4704 p.stream_in (&ib);
4705 sreal fcp_freq = sreal::stream_in (&ib);
4706 if (info)
4708 ipa_freqcounting_predicate fcp;
4709 fcp.predicate = NULL;
4710 set_hint_predicate (&fcp.predicate, p);
4711 fcp.freq = fcp_freq;
4712 vec_safe_push (info->loop_strides, fcp);
4715 count2 = streamer_read_uhwi (&ib);
4716 if (info && count2)
4717 info->builtin_constant_p_parms.reserve_exact (count2);
4718 for (j = 0; j < count2; j++)
4720 int parm = streamer_read_uhwi (&ib);
4721 if (info)
4722 info->builtin_constant_p_parms.quick_push (parm);
4724 for (e = node->callees; e; e = e->next_callee)
4725 read_ipa_call_summary (&ib, e, info != NULL);
4726 for (e = node->indirect_calls; e; e = e->next_callee)
4727 read_ipa_call_summary (&ib, e, info != NULL);
4730 lto_free_section_data (file_data, LTO_section_ipa_fn_summary, NULL, data,
4731 len);
4732 lto_data_in_delete (data_in);
4736 /* Read inline summary. Jump functions are shared among ipa-cp
4737 and inliner, so when ipa-cp is active, we don't need to write them
4738 twice. */
4740 static void
4741 ipa_fn_summary_read (void)
4743 struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data ();
4744 struct lto_file_decl_data *file_data;
4745 unsigned int j = 0;
4747 ipa_prop_read_jump_functions ();
4748 ipa_fn_summary_alloc ();
4750 while ((file_data = file_data_vec[j++]))
4752 size_t len;
4753 const char *data
4754 = lto_get_summary_section_data (file_data, LTO_section_ipa_fn_summary,
4755 &len);
4756 if (data)
4757 inline_read_section (file_data, data, len);
4758 else
4759 /* Fatal error here. We do not want to support compiling ltrans units
4760 with different version of compiler or different flags than the WPA
4761 unit, so this should never happen. */
4762 fatal_error (input_location,
4763 "ipa inline summary is missing in input file");
4765 ipa_register_cgraph_hooks ();
4767 gcc_assert (ipa_fn_summaries);
4768 ipa_fn_summaries->enable_insertion_hook ();
4772 /* Write inline summary for edge E to OB. */
4774 static void
4775 write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
4777 class ipa_call_summary *es = ipa_call_summaries->get (e);
4778 int i;
4780 streamer_write_uhwi (ob, es->call_stmt_size);
4781 streamer_write_uhwi (ob, es->call_stmt_time);
4782 streamer_write_uhwi (ob, es->loop_depth);
4784 bitpack_d bp = bitpack_create (ob->main_stream);
4785 bp_pack_value (&bp, es->is_return_callee_uncaptured, 1);
4786 streamer_write_bitpack (&bp);
4788 if (es->predicate)
4789 es->predicate->stream_out (ob);
4790 else
4791 streamer_write_uhwi (ob, 0);
4792 streamer_write_uhwi (ob, es->param.length ());
4793 for (i = 0; i < (int) es->param.length (); i++)
4795 streamer_write_uhwi (ob, es->param[i].change_prob);
4796 bp = bitpack_create (ob->main_stream);
4797 bp_pack_value (&bp, es->param[i].points_to_local_or_readonly_memory, 1);
4798 bp_pack_value (&bp, es->param[i].points_to_possible_sra_candidate, 1);
4799 streamer_write_bitpack (&bp);
4804 /* Write inline summary for node in SET.
4805 Jump functions are shared among ipa-cp and inliner, so when ipa-cp is
4806 active, we don't need to write them twice. */
4808 static void
4809 ipa_fn_summary_write (void)
4811 struct output_block *ob = create_output_block (LTO_section_ipa_fn_summary);
4812 lto_symtab_encoder_iterator lsei;
4813 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
4814 unsigned int count = 0;
4816 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4817 lsei_next_function_in_partition (&lsei))
4819 cgraph_node *cnode = lsei_cgraph_node (lsei);
4820 if (cnode->definition && !cnode->alias)
4821 count++;
4823 streamer_write_uhwi (ob, count);
4825 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
4826 lsei_next_function_in_partition (&lsei))
4828 cgraph_node *cnode = lsei_cgraph_node (lsei);
4829 if (cnode->definition && !cnode->alias)
4831 class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
4832 class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
4833 struct bitpack_d bp;
4834 struct cgraph_edge *edge;
4835 int i;
4836 size_time_entry *e;
4837 struct condition *c;
4839 streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
4840 streamer_write_hwi (ob, size_info->estimated_self_stack_size);
4841 streamer_write_hwi (ob, size_info->self_size);
4842 info->time.stream_out (ob);
4843 bp = bitpack_create (ob->main_stream);
4844 bp_pack_value (&bp, info->inlinable, 1);
4845 bp_pack_value (&bp, info->fp_expressions, 1);
4846 streamer_write_bitpack (&bp);
4847 if (!lto_stream_offload_p)
4848 streamer_write_uhwi (ob, info->target_info);
4849 streamer_write_uhwi (ob, vec_safe_length (info->conds));
4850 for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
4852 int j;
4853 struct expr_eval_op *op;
4855 streamer_write_uhwi (ob, c->operand_num);
4856 streamer_write_uhwi (ob, c->code);
4857 stream_write_tree (ob, c->type, true);
4858 stream_write_tree (ob, c->val, true);
4859 bp = bitpack_create (ob->main_stream);
4860 bp_pack_value (&bp, c->agg_contents, 1);
4861 bp_pack_value (&bp, c->by_ref, 1);
4862 streamer_write_bitpack (&bp);
4863 if (c->agg_contents)
4864 streamer_write_uhwi (ob, c->offset);
4865 streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
4866 for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
4868 streamer_write_uhwi (ob, op->code);
4869 stream_write_tree (ob, op->type, true);
4870 if (op->val[0])
4872 bp = bitpack_create (ob->main_stream);
4873 bp_pack_value (&bp, op->index, 2);
4874 streamer_write_bitpack (&bp);
4875 stream_write_tree (ob, op->val[0], true);
4876 if (op->val[1])
4877 stream_write_tree (ob, op->val[1], true);
4881 streamer_write_uhwi (ob, info->size_time_table.length ());
4882 for (i = 0; info->size_time_table.iterate (i, &e); i++)
4884 streamer_write_uhwi (ob, e->size);
4885 e->time.stream_out (ob);
4886 e->exec_predicate.stream_out (ob);
4887 e->nonconst_predicate.stream_out (ob);
4889 ipa_freqcounting_predicate *fcp;
4890 streamer_write_uhwi (ob, vec_safe_length (info->loop_iterations));
4891 for (i = 0; vec_safe_iterate (info->loop_iterations, i, &fcp); i++)
4893 fcp->predicate->stream_out (ob);
4894 fcp->freq.stream_out (ob);
4896 streamer_write_uhwi (ob, vec_safe_length (info->loop_strides));
4897 for (i = 0; vec_safe_iterate (info->loop_strides, i, &fcp); i++)
4899 fcp->predicate->stream_out (ob);
4900 fcp->freq.stream_out (ob);
4902 streamer_write_uhwi (ob, info->builtin_constant_p_parms.length ());
4903 int ip;
4904 for (i = 0; info->builtin_constant_p_parms.iterate (i, &ip);
4905 i++)
4906 streamer_write_uhwi (ob, ip);
4907 for (edge = cnode->callees; edge; edge = edge->next_callee)
4908 write_ipa_call_summary (ob, edge);
4909 for (edge = cnode->indirect_calls; edge; edge = edge->next_callee)
4910 write_ipa_call_summary (ob, edge);
4913 streamer_write_char_stream (ob->main_stream, 0);
4914 produce_asm (ob, NULL);
4915 destroy_output_block (ob);
4917 ipa_prop_write_jump_functions ();
4921 /* Release function summary. */
4923 void
4924 ipa_free_fn_summary (void)
4926 if (!ipa_call_summaries)
4927 return;
4928 ggc_delete (ipa_fn_summaries);
4929 ipa_fn_summaries = NULL;
4930 delete ipa_call_summaries;
4931 ipa_call_summaries = NULL;
4932 edge_predicate_pool.release ();
4933 /* During IPA this is one of largest datastructures to release. */
4934 if (flag_wpa)
4935 ggc_trim ();
4938 /* Release function summary. */
4940 void
4941 ipa_free_size_summary (void)
4943 if (!ipa_size_summaries)
4944 return;
4945 delete ipa_size_summaries;
4946 ipa_size_summaries = NULL;
4949 namespace {
4951 const pass_data pass_data_local_fn_summary =
4953 GIMPLE_PASS, /* type */
4954 "local-fnsummary", /* name */
4955 OPTGROUP_INLINE, /* optinfo_flags */
4956 TV_INLINE_PARAMETERS, /* tv_id */
4957 0, /* properties_required */
4958 0, /* properties_provided */
4959 0, /* properties_destroyed */
4960 0, /* todo_flags_start */
4961 0, /* todo_flags_finish */
4964 class pass_local_fn_summary : public gimple_opt_pass
4966 public:
4967 pass_local_fn_summary (gcc::context *ctxt)
4968 : gimple_opt_pass (pass_data_local_fn_summary, ctxt)
4971 /* opt_pass methods: */
4972 opt_pass * clone () final override
4974 return new pass_local_fn_summary (m_ctxt);
4976 unsigned int execute (function *) final override
4978 return compute_fn_summary_for_current ();
4981 }; // class pass_local_fn_summary
4983 } // anon namespace
4985 gimple_opt_pass *
4986 make_pass_local_fn_summary (gcc::context *ctxt)
4988 return new pass_local_fn_summary (ctxt);
4992 /* Free inline summary. */
4994 namespace {
4996 const pass_data pass_data_ipa_free_fn_summary =
4998 SIMPLE_IPA_PASS, /* type */
4999 "free-fnsummary", /* name */
5000 OPTGROUP_NONE, /* optinfo_flags */
5001 TV_IPA_FREE_INLINE_SUMMARY, /* tv_id */
5002 0, /* properties_required */
5003 0, /* properties_provided */
5004 0, /* properties_destroyed */
5005 0, /* todo_flags_start */
5006 0, /* todo_flags_finish */
5009 class pass_ipa_free_fn_summary : public simple_ipa_opt_pass
5011 public:
5012 pass_ipa_free_fn_summary (gcc::context *ctxt)
5013 : simple_ipa_opt_pass (pass_data_ipa_free_fn_summary, ctxt),
5014 small_p (false)
5017 /* opt_pass methods: */
5018 opt_pass *clone () final override
5020 return new pass_ipa_free_fn_summary (m_ctxt);
5022 void set_pass_param (unsigned int n, bool param) final override
5024 gcc_assert (n == 0);
5025 small_p = param;
5027 bool gate (function *) final override { return true; }
5028 unsigned int execute (function *) final override
5030 ipa_free_fn_summary ();
5031 /* Free ipa-prop structures if they are no longer needed. */
5032 ipa_free_all_structures_after_iinln ();
5033 if (!flag_wpa)
5034 ipa_free_size_summary ();
5035 return 0;
5038 private:
5039 bool small_p;
5040 }; // class pass_ipa_free_fn_summary
5042 } // anon namespace
5044 simple_ipa_opt_pass *
5045 make_pass_ipa_free_fn_summary (gcc::context *ctxt)
5047 return new pass_ipa_free_fn_summary (ctxt);
5050 namespace {
5052 const pass_data pass_data_ipa_fn_summary =
5054 IPA_PASS, /* type */
5055 "fnsummary", /* name */
5056 OPTGROUP_INLINE, /* optinfo_flags */
5057 TV_IPA_FNSUMMARY, /* tv_id */
5058 0, /* properties_required */
5059 0, /* properties_provided */
5060 0, /* properties_destroyed */
5061 0, /* todo_flags_start */
5062 ( TODO_dump_symtab ), /* todo_flags_finish */
5065 class pass_ipa_fn_summary : public ipa_opt_pass_d
5067 public:
5068 pass_ipa_fn_summary (gcc::context *ctxt)
5069 : ipa_opt_pass_d (pass_data_ipa_fn_summary, ctxt,
5070 ipa_fn_summary_generate, /* generate_summary */
5071 ipa_fn_summary_write, /* write_summary */
5072 ipa_fn_summary_read, /* read_summary */
5073 NULL, /* write_optimization_summary */
5074 NULL, /* read_optimization_summary */
5075 NULL, /* stmt_fixup */
5076 0, /* function_transform_todo_flags_start */
5077 NULL, /* function_transform */
5078 NULL) /* variable_transform */
5081 /* opt_pass methods: */
5082 unsigned int execute (function *) final override { return 0; }
5084 }; // class pass_ipa_fn_summary
5086 } // anon namespace
5088 ipa_opt_pass_d *
5089 make_pass_ipa_fn_summary (gcc::context *ctxt)
5091 return new pass_ipa_fn_summary (ctxt);
5094 /* Reset all state within ipa-fnsummary.cc so that we can rerun the compiler
5095 within the same process. For use by toplev::finalize. */
5097 void
5098 ipa_fnsummary_cc_finalize (void)
5100 ipa_free_fn_summary ();
5101 ipa_free_size_summary ();