1 // Late-stage instruction combination pass.
2 // Copyright (C) 2023-2024 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // The current purpose of this pass is to substitute definitions into
21 // all uses, so that the definition can be removed. However, it could
22 // be extended to handle other combination-related optimizations in future.
24 // The pass can run before or after register allocation. When running
25 // before register allocation, it tries to avoid cases that are likely
26 // to increase register pressure. For the same reason, it avoids moving
27 // instructions around, even if doing so would allow an optimization to
28 // succeed. These limitations are removed when running after register
31 #define INCLUDE_ALGORITHM
32 #define INCLUDE_FUNCTIONAL
36 #include "coretypes.h"
41 #include "print-rtl.h"
42 #include "tree-pass.h"
43 #include "cfgcleanup.h"
47 using namespace rtl_ssa
;
50 const pass_data pass_data_late_combine
=
53 "late_combine", // name
54 OPTGROUP_NONE
, // optinfo_flags
56 0, // properties_required
57 0, // properties_provided
58 0, // properties_destroyed
59 0, // todo_flags_start
60 TODO_df_finish
, // todo_flags_finish
63 // Represents an attempt to substitute a single-set definition into all
64 // uses of the definition.
65 class insn_combination
68 insn_combination (set_info
*, rtx
, rtx
);
70 array_slice
<insn_change
*const> use_changes () const;
73 use_array
get_new_uses (use_info
*);
74 bool substitute_nondebug_use (use_info
*);
75 bool substitute_nondebug_uses (set_info
*);
76 bool try_to_preserve_debug_info (insn_change
&, use_info
*);
77 void substitute_debug_use (use_info
*);
78 bool substitute_note (insn_info
*, rtx
, bool);
79 void substitute_notes (insn_info
*, bool);
80 void substitute_note_uses (use_info
*);
81 void substitute_optional_uses (set_info
*);
83 // Represents the state of the function's RTL at the start of this
84 // combination attempt.
85 insn_change_watermark m_rtl_watermark
;
87 // Represents the rtl-ssa state at the start of this combination attempt.
88 obstack_watermark m_attempt
;
90 // The instruction that contains the definition, and that we're trying
92 insn_info
*m_def_insn
;
94 // The definition itself.
97 // The destination and source of the single set that defines m_def.
98 // The destination is known to be a plain REG.
102 // Contains the full list of changes that we want to make, in reverse
104 auto_vec
<insn_change
*> m_nondebug_changes
;
107 // Class that represents one run of the pass.
111 unsigned int execute (function
*);
114 rtx
optimizable_set (insn_info
*);
115 bool check_register_pressure (insn_info
*, rtx
);
116 bool check_uses (set_info
*, rtx
);
117 bool combine_into_uses (insn_info
*, insn_info
*);
119 auto_vec
<insn_info
*> m_worklist
;
122 insn_combination::insn_combination (set_info
*def
, rtx dest
, rtx src
)
123 : m_rtl_watermark (),
124 m_attempt (crtl
->ssa
->new_change_attempt ()),
125 m_def_insn (def
->insn ()),
129 m_nondebug_changes ()
133 array_slice
<insn_change
*const>
134 insn_combination::use_changes () const
136 return { m_nondebug_changes
.address () + 1,
137 m_nondebug_changes
.length () - 1 };
140 // USE is a direct or indirect use of m_def. Return the list of uses
141 // that would be needed after substituting m_def into the instruction.
142 // The returned list is marked as invalid if USE's insn and m_def_insn
143 // use different definitions for the same resource (register or memory).
145 insn_combination::get_new_uses (use_info
*use
)
147 auto *def
= use
->def ();
148 auto *use_insn
= use
->insn ();
150 use_array new_uses
= use_insn
->uses ();
151 new_uses
= remove_uses_of_def (m_attempt
, new_uses
, def
);
152 new_uses
= merge_access_arrays (m_attempt
, m_def_insn
->uses (), new_uses
);
153 if (new_uses
.is_valid () && use
->ebb () != m_def
->ebb ())
154 new_uses
= crtl
->ssa
->make_uses_available (m_attempt
, new_uses
, use
->bb (),
155 use_insn
->is_debug_insn ());
159 // Start the process of trying to replace USE by substitution, given that
160 // USE occurs in a non-debug instruction. Check:
162 // - that the substitution can be represented in RTL
164 // - that each use of a resource (register or memory) within the new
165 // instruction has a consistent definition
167 // - that the new instruction is a recognized pattern
169 // - that the instruction can be placed somewhere that makes all definitions
170 // and uses valid, and that permits any new hard-register clobbers added
171 // during the recognition process
173 // Return true on success.
175 insn_combination::substitute_nondebug_use (use_info
*use
)
177 insn_info
*use_insn
= use
->insn ();
178 rtx_insn
*use_rtl
= use_insn
->rtl ();
180 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
181 dump_insn_slim (dump_file
, use
->insn ()->rtl ());
183 // Reject second and subsequent uses if the target does not allow
184 // the defining instruction to be copied.
185 if (targetm
.cannot_copy_insn_p
186 && m_nondebug_changes
.length () >= 2
187 && targetm
.cannot_copy_insn_p (m_def_insn
->rtl ()))
189 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
190 fprintf (dump_file
, "-- The target does not allow multiple"
191 " copies of insn %d\n", m_def_insn
->uid ());
195 // Check that we can change the instruction pattern. Leave recognition
196 // of the result till later.
197 insn_propagation
prop (use_rtl
, m_dest
, m_src
);
198 if (!prop
.apply_to_pattern (&PATTERN (use_rtl
))
199 || prop
.num_replacements
== 0)
201 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
202 fprintf (dump_file
, "-- RTL substitution failed\n");
206 use_array new_uses
= get_new_uses (use
);
207 if (!new_uses
.is_valid ())
209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
210 fprintf (dump_file
, "-- could not prove that all sources"
215 // Create a tentative change for the use.
216 auto *where
= XOBNEW (m_attempt
, insn_change
);
217 auto *use_change
= new (where
) insn_change (use_insn
);
218 m_nondebug_changes
.safe_push (use_change
);
219 use_change
->new_uses
= new_uses
;
221 struct local_ignore
: ignore_nothing
223 local_ignore (const set_info
*def
, const insn_info
*use_insn
)
224 : m_def (def
), m_use_insn (use_insn
) {}
226 // We don't limit the number of insns per optimization, so ignoring all
227 // insns for all insns would lead to quadratic complexity. Just ignore
228 // the use and definition, which should be enough for most purposes.
230 should_ignore_insn (const insn_info
*insn
)
232 return insn
== m_def
->insn () || insn
== m_use_insn
;
235 // Ignore the definition that we're removing, and all uses of it.
236 bool should_ignore_def (const def_info
*def
) { return def
== m_def
; }
238 const set_info
*m_def
;
239 const insn_info
*m_use_insn
;
242 auto ignore
= local_ignore (m_def
, use_insn
);
244 // Moving instructions before register allocation could increase
245 // register pressure. Only try moving them after RA.
246 if (reload_completed
&& can_move_insn_p (use_insn
))
247 use_change
->move_range
= { use_insn
->bb ()->head_insn (),
248 use_insn
->ebb ()->last_bb ()->end_insn () };
249 if (!restrict_movement (*use_change
, ignore
))
251 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
252 fprintf (dump_file
, "-- cannot satisfy all definitions and uses"
253 " in insn %d\n", INSN_UID (use_insn
->rtl ()));
257 if (!recog (m_attempt
, *use_change
, ignore
))
263 // Apply substitute_nondebug_use to all direct and indirect uses of DEF.
264 // There will be at most one level of indirection.
266 insn_combination::substitute_nondebug_uses (set_info
*def
)
268 for (use_info
*use
: def
->nondebug_insn_uses ())
269 if (!use
->is_live_out_use ()
270 && !use
->only_occurs_in_notes ()
271 && !substitute_nondebug_use (use
))
274 for (use_info
*use
: def
->phi_uses ())
275 if (!substitute_nondebug_uses (use
->phi ()))
281 // USE_CHANGE.insn () is a debug instruction that uses m_def. Try to
282 // substitute the definition into the instruction and try to describe
283 // the result in USE_CHANGE. Return true on success. Failure means that
284 // the instruction must be reset instead.
286 insn_combination::try_to_preserve_debug_info (insn_change
&use_change
,
289 // Punt on unsimplified subregs of hard registers. In that case,
290 // propagation can succeed and create a wider reg than the one we
292 if (HARD_REGISTER_NUM_P (use
->regno ())
293 && use
->includes_subregs ())
296 insn_info
*use_insn
= use_change
.insn ();
297 rtx_insn
*use_rtl
= use_insn
->rtl ();
299 use_change
.new_uses
= get_new_uses (use
);
300 if (!use_change
.new_uses
.is_valid ()
301 || !restrict_movement (use_change
))
304 insn_propagation
prop (use_rtl
, m_dest
, m_src
);
305 return prop
.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl
));
308 // USE_INSN is a debug instruction that uses m_def. Update it to reflect
309 // the fact that m_def is going to disappear. Try to preserve the source
310 // value if possible, but reset the instruction if not.
312 insn_combination::substitute_debug_use (use_info
*use
)
314 auto *use_insn
= use
->insn ();
315 rtx_insn
*use_rtl
= use_insn
->rtl ();
317 auto use_change
= insn_change (use_insn
);
318 if (!try_to_preserve_debug_info (use_change
, use
))
320 use_change
.new_uses
= {};
321 use_change
.move_range
= use_change
.insn ();
322 INSN_VAR_LOCATION_LOC (use_rtl
) = gen_rtx_UNKNOWN_VAR_LOC ();
324 insn_change
*changes
[] = { &use_change
};
325 crtl
->ssa
->change_insns (changes
);
328 // NOTE is a reg note of USE_INSN, which previously used m_def. Update
329 // the note to reflect the fact that m_def is going to disappear. Return
330 // true on success, or false if the note must be deleted.
332 // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
334 insn_combination::substitute_note (insn_info
*use_insn
, rtx note
,
337 if (REG_NOTE_KIND (note
) == REG_EQUAL
338 || REG_NOTE_KIND (note
) == REG_EQUIV
)
340 insn_propagation
prop (use_insn
->rtl (), m_dest
, m_src
);
341 return (prop
.apply_to_note (&XEXP (note
, 0))
342 && (can_propagate
|| prop
.num_replacements
== 0));
347 // Update USE_INSN's notes after deciding to go ahead with the optimization.
348 // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
350 insn_combination::substitute_notes (insn_info
*use_insn
, bool can_propagate
)
352 rtx_insn
*use_rtl
= use_insn
->rtl ();
353 rtx
*ptr
= ®_NOTES (use_rtl
);
354 while (rtx note
= *ptr
)
356 if (substitute_note (use_insn
, note
, can_propagate
))
357 ptr
= &XEXP (note
, 1);
359 *ptr
= XEXP (note
, 1);
363 // We've decided to go ahead with the substitution. Update all REG_NOTES
366 insn_combination::substitute_note_uses (use_info
*use
)
368 insn_info
*use_insn
= use
->insn ();
370 bool can_propagate
= true;
371 if (use
->only_occurs_in_notes ())
373 // The only uses are in notes. Try to keep the note if we can,
374 // but removing it is better than aborting the optimization.
375 insn_change
use_change (use_insn
);
376 use_change
.new_uses
= get_new_uses (use
);
377 if (!use_change
.new_uses
.is_valid ()
378 || !restrict_movement (use_change
))
380 use_change
.move_range
= use_insn
;
381 use_change
.new_uses
= remove_uses_of_def (m_attempt
,
384 can_propagate
= false;
386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
388 fprintf (dump_file
, "%s notes in:\n",
389 can_propagate
? "updating" : "removing");
390 dump_insn_slim (dump_file
, use_insn
->rtl ());
392 substitute_notes (use_insn
, can_propagate
);
393 insn_change
*changes
[] = { &use_change
};
394 crtl
->ssa
->change_insns (changes
);
397 // We've already decided to update the insn's pattern and know that m_src
398 // will be available at the insn's new location. Now update its notes.
399 substitute_notes (use_insn
, can_propagate
);
402 // We've decided to go ahead with the substitution and we've dealt with
403 // all uses that occur in the patterns of non-debug insns. Update all
404 // other uses for the fact that m_def is about to disappear.
406 insn_combination::substitute_optional_uses (set_info
*def
)
408 if (auto insn_uses
= def
->all_insn_uses ())
410 use_info
*use
= *insn_uses
.begin ();
413 use_info
*next_use
= use
->next_any_insn_use ();
414 if (use
->is_in_debug_insn ())
415 substitute_debug_use (use
);
416 else if (!use
->is_live_out_use ())
417 substitute_note_uses (use
);
421 for (use_info
*use
: def
->phi_uses ())
422 substitute_optional_uses (use
->phi ());
425 // Try to perform the substitution. Return true on success.
427 insn_combination::run ()
429 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
431 fprintf (dump_file
, "\ntrying to combine definition of r%d in:\n",
433 dump_insn_slim (dump_file
, m_def_insn
->rtl ());
434 fprintf (dump_file
, "into:\n");
437 auto def_change
= insn_change::delete_insn (m_def_insn
);
438 m_nondebug_changes
.safe_push (&def_change
);
440 if (!substitute_nondebug_uses (m_def
)
441 || !changes_are_worthwhile (m_nondebug_changes
)
442 || !crtl
->ssa
->verify_insn_changes (m_nondebug_changes
))
445 // We've now decided that the optimization is valid and profitable.
446 // Allow it to be suppressed for bisection purposes.
447 if (!dbg_cnt (::late_combine
))
450 substitute_optional_uses (m_def
);
452 confirm_change_group ();
453 crtl
->ssa
->change_insns (m_nondebug_changes
);
457 // See whether INSN is a single_set that we can optimize. Return the
458 // set if so, otherwise return null.
460 late_combine::optimizable_set (insn_info
*insn
)
462 if (!insn
->can_be_optimized ()
465 || insn
->has_volatile_refs ()
466 || insn
->has_pre_post_modify ()
467 || !can_move_insn_p (insn
))
470 return single_set (insn
->rtl ());
473 // Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET),
474 // where SET occurs in INSN. Return true if doing so is not likely to
475 // increase register pressure.
477 late_combine::check_register_pressure (insn_info
*insn
, rtx set
)
479 // Plain register-to-register moves do not establish a register class
480 // preference and have no well-defined effect on the register allocator.
481 // If changes in register class are needed, the register allocator is
482 // in the best position to place those changes. If no change in
483 // register class is needed, then the optimization reduces register
484 // pressure if SET_SRC (set) was already live at uses, otherwise the
485 // optimization is pressure-neutral.
486 rtx src
= SET_SRC (set
);
490 // On the same basis, substituting a SET_SRC that contains a single
491 // pseudo register either reduces pressure or is pressure-neutral,
492 // subject to the constraints below. We would need to do more
493 // analysis for SET_SRCs that use more than one pseudo register.
494 unsigned int nregs
= 0;
495 for (auto *use
: insn
->uses ())
497 && !HARD_REGISTER_NUM_P (use
->regno ())
498 && !use
->only_occurs_in_notes ())
502 // If there are no pseudo registers in SET_SRC then the optimization
503 // should improve register pressure.
507 // We'd be substituting (set (reg R1) SRC) where SRC is known to
508 // contain a single pseudo register R2. Assume for simplicity that
509 // each new use of R2 would need to be in the same class C as the
510 // current use of R2. If, for a realistic allocation, C is a
511 // non-strict superset of the R1's register class, the effect on
512 // register pressure should be positive or neutral. If instead
513 // R1 occupies a different register class from R2, or if R1 has
514 // more allocation freedom than R2, then there's a higher risk that
515 // the effect on register pressure could be negative.
517 // First use constrain_operands to get the most likely choice of
518 // alternative. For simplicity, just handle the case where the
519 // output operand is operand 0.
520 extract_insn (insn
->rtl ());
521 rtx dest
= SET_DEST (set
);
522 if (recog_data
.n_operands
== 0
523 || recog_data
.operand
[0] != dest
)
526 if (!constrain_operands (0, get_enabled_alternatives (insn
->rtl ())))
529 preprocess_constraints (insn
->rtl ());
530 auto *alt
= which_op_alt ();
531 auto dest_class
= alt
[0].cl
;
533 // Check operands 1 and above.
534 auto check_src
= [&] (unsigned int i
)
536 if (recog_data
.is_operator
[i
])
539 rtx op
= recog_data
.operand
[i
];
544 op
= SUBREG_REG (op
);
547 // Ignore hard registers. We've already rejected uses of non-fixed
548 // hard registers in the SET_SRC.
549 if (HARD_REGISTER_P (op
))
552 // Make sure that the source operand's class is at least as
553 // permissive as the destination operand's class.
554 auto src_class
= alternative_class (alt
, i
);
555 if (!reg_class_subset_p (dest_class
, src_class
))
558 // Make sure that the source operand occupies no more hard
559 // registers than the destination operand. This mostly matters
561 if (targetm
.class_max_nregs (dest_class
, GET_MODE (dest
))
562 < targetm
.class_max_nregs (src_class
, GET_MODE (op
)))
569 for (int i
= 1; i
< recog_data
.n_operands
; ++i
)
570 if (recog_data
.operand_type
[i
] != OP_OUT
&& !check_src (i
))
576 // Check uses of DEF to see whether there is anything obvious that
577 // prevents the substitution of SET into uses of DEF.
579 late_combine::check_uses (set_info
*def
, rtx set
)
581 use_info
*prev_use
= nullptr;
582 for (use_info
*use
: def
->nondebug_insn_uses ())
584 insn_info
*use_insn
= use
->insn ();
586 if (use
->is_live_out_use ())
588 if (use
->only_occurs_in_notes ())
591 // We cannot replace all uses if the value is live on exit.
592 if (use
->is_artificial ())
595 // Avoid increasing the complexity of instructions that
596 // reference allocatable hard registers.
597 if (!REG_P (SET_SRC (set
))
599 && (accesses_include_nonfixed_hard_registers (use_insn
->uses ())
600 || accesses_include_nonfixed_hard_registers (use_insn
->defs ())))
603 // Don't substitute into a non-local goto, since it can then be
604 // treated as a jump to local label, e.g. in shorten_branches.
605 // ??? But this shouldn't be necessary.
606 if (use_insn
->is_jump ()
607 && find_reg_note (use_insn
->rtl (), REG_NON_LOCAL_GOTO
, NULL_RTX
))
610 // Reject cases where one of the uses is a function argument.
611 // The combine attempt should fail anyway, but this is a common
612 // case that is easy to check early.
613 if (use_insn
->is_call ()
614 && HARD_REGISTER_P (SET_DEST (set
))
615 && find_reg_fusage (use_insn
->rtl (), USE
, SET_DEST (set
)))
618 // We'll keep the uses in their original order, even if we move
619 // them relative to other instructions. Make sure that non-final
620 // uses do not change any values that occur in the SET_SRC.
621 if (prev_use
&& prev_use
->ebb () == use
->ebb ())
623 def_info
*ultimate_def
= look_through_degenerate_phi (def
);
624 if (insn_clobbers_resources (prev_use
->insn (),
625 ultimate_def
->insn ()->uses ()))
632 for (use_info
*use
: def
->phi_uses ())
633 if (!use
->phi ()->is_degenerate ()
634 || !check_uses (use
->phi (), set
))
640 // Try to remove INSN by substituting a definition into all uses.
641 // If the optimization moves any instructions before CURSOR, add those
642 // instructions to the end of m_worklist.
644 late_combine::combine_into_uses (insn_info
*insn
, insn_info
*cursor
)
646 // For simplicity, don't try to handle sets of multiple hard registers.
647 // And for correctness, don't remove any assignments to the stack or
648 // frame pointers, since that would implicitly change the set of valid
649 // memory locations between this assignment and the next.
651 // Removing assignments to the hard frame pointer would invalidate
653 set_info
*def
= single_set_info (insn
);
656 || def
->regno () == STACK_POINTER_REGNUM
657 || def
->regno () == FRAME_POINTER_REGNUM
658 || def
->regno () == HARD_FRAME_POINTER_REGNUM
)
661 rtx set
= optimizable_set (insn
);
665 // For simplicity, don't try to handle subreg destinations.
666 rtx dest
= SET_DEST (set
);
667 if (!REG_P (dest
) || def
->regno () != REGNO (dest
))
670 // Don't prolong the live ranges of allocatable hard registers, or put
671 // them into more complicated instructions. Failing to prevent this
672 // could lead to spill failures, or at least to worst register allocation.
673 if (!reload_completed
674 && accesses_include_nonfixed_hard_registers (insn
->uses ()))
677 if (!reload_completed
&& !check_register_pressure (insn
, set
))
680 if (!check_uses (def
, set
))
683 insn_combination
combination (def
, SET_DEST (set
), SET_SRC (set
));
684 if (!combination
.run ())
687 for (auto *use_change
: combination
.use_changes ())
688 if (*use_change
->insn () < *cursor
)
689 m_worklist
.safe_push (use_change
->insn ());
695 // Run the pass on function FN.
697 late_combine::execute (function
*fn
)
700 calculate_dominance_info (CDI_DOMINATORS
);
702 crtl
->ssa
= new rtl_ssa::function_info (fn
);
703 // Don't allow memory_operand to match volatile MEMs.
704 init_recog_no_volatile ();
706 insn_info
*insn
= *crtl
->ssa
->nondebug_insns ().begin ();
709 if (!insn
->is_artificial ())
711 insn_info
*prev
= insn
->prev_nondebug_insn ();
712 if (combine_into_uses (insn
, prev
))
714 // Any instructions that get added to the worklist were
715 // previously after PREV. Thus if we were able to move
716 // an instruction X before PREV during one combination,
717 // X cannot depend on any instructions that we move before
718 // PREV during subsequent combinations. This means that
719 // the worklist should be free of backwards dependencies,
720 // even if it isn't necessarily in RPO.
721 for (unsigned int i
= 0; i
< m_worklist
.length (); ++i
)
722 combine_into_uses (m_worklist
[i
], prev
);
723 m_worklist
.truncate (0);
727 insn
= insn
->next_nondebug_insn ();
731 if (crtl
->ssa
->perform_pending_updates ())
733 // Make the recognizer allow volatile MEMs again.
735 free_dominance_info (CDI_DOMINATORS
);
739 class pass_late_combine
: public rtl_opt_pass
742 pass_late_combine (gcc::context
*ctxt
)
743 : rtl_opt_pass (pass_data_late_combine
, ctxt
)
747 opt_pass
*clone () override
{ return new pass_late_combine (m_ctxt
); }
748 bool gate (function
*) override
;
749 unsigned int execute (function
*) override
;
753 pass_late_combine::gate (function
*)
755 return optimize
> 0 && flag_late_combine_instructions
;
759 pass_late_combine::execute (function
*fn
)
761 return late_combine ().execute (fn
);
766 // Create a new CC fusion pass instance.
769 make_pass_late_combine (gcc::context
*ctxt
)
771 return new pass_late_combine (ctxt
);