Daily bump.
[official-gcc.git] / gcc / config / aarch64 / aarch64-cc-fusion.cc
blobcea54dee298bb4d090fdd20de8711471b455edbe
1 // Pass to fuse CC operations with other instructions.
2 // Copyright (C) 2021-2025 Free Software Foundation, Inc.
3 //
4 // This file is part of GCC.
5 //
6 // GCC is free software; you can redistribute it and/or modify it under
7 // the terms of the GNU General Public License as published by the Free
8 // Software Foundation; either version 3, or (at your option) any later
9 // version.
11 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 // for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
20 // This pass looks for sequences of the form:
22 // A: (set (reg R1) X1)
23 // B: ...instructions that might change the value of X1...
24 // C: (set (reg CC) X2) // X2 uses R1
26 // and tries to change them to:
28 // C': [(set (reg CC) X2')
29 // (set (reg R1) X1)]
30 // B: ...instructions that might change the value of X1...
32 // where X2' is the result of replacing R1 with X1 in X2.
34 // This sequence occurs in SVE code in two important cases:
36 // (a) Sometimes, to deal correctly with overflow, we need to increment
37 // an IV after a WHILELO rather than before it. In this case:
38 // - A is a WHILELO,
39 // - B includes an IV increment and
40 // - C is a separate PTEST.
42 // (b) ACLE code of the form:
44 // svbool_t ok = svrdffr ();
45 // if (svptest_last (pg, ok))
46 // ...
48 // must, for performance reasons, be code-generated as:
50 // RDFFRS Pok.B, Pg/Z
51 // ...branch on flags result...
53 // without a separate PTEST of Pok. In this case:
54 // - A is an aarch64_rdffr
55 // - B includes an aarch64_update_ffrt
56 // - C is a separate PTEST
58 // Combine can handle this optimization if B doesn't exist and if A and
59 // C are in the same BB. This pass instead handles cases where B does
60 // exist and cases where A and C are in different BBs of the same EBB.
62 #define IN_TARGET_CODE 1
64 #define INCLUDE_ALGORITHM
65 #define INCLUDE_FUNCTIONAL
66 #define INCLUDE_ARRAY
67 #include "config.h"
68 #include "system.h"
69 #include "coretypes.h"
70 #include "backend.h"
71 #include "rtl.h"
72 #include "df.h"
73 #include "rtl-ssa.h"
74 #include "tree-pass.h"
76 using namespace rtl_ssa;
78 namespace {
79 const pass_data pass_data_cc_fusion =
81 RTL_PASS, // type
82 "cc_fusion", // name
83 OPTGROUP_NONE, // optinfo_flags
84 TV_NONE, // tv_id
85 0, // properties_required
86 0, // properties_provided
87 0, // properties_destroyed
88 0, // todo_flags_start
89 TODO_df_finish, // todo_flags_finish
92 // Class that represents one run of the pass.
93 class cc_fusion
95 public:
96 cc_fusion () : m_parallel () {}
97 void execute ();
99 private:
100 rtx optimizable_set (const insn_info *);
101 bool parallelize_insns (def_info *, rtx, def_info *, rtx);
102 void optimize_cc_setter (def_info *, rtx);
104 // A spare PARALLEL rtx, or null if none.
105 rtx m_parallel;
108 // See whether INSN is a single_set that we can optimize. Return the
109 // set if so, otherwise return null.
111 cc_fusion::optimizable_set (const insn_info *insn)
113 if (!insn->can_be_optimized ()
114 || insn->is_asm ()
115 || insn->has_volatile_refs ()
116 || insn->has_pre_post_modify ())
117 return NULL_RTX;
119 return single_set (insn->rtl ());
122 // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
123 // a single_set that sets (only) OTHER_DEF. CC_SET is known to set the
124 // CC register and the instruction that contains CC_SET is known to use
125 // OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel.
126 bool
127 cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set,
128 def_info *other_def, rtx other_set)
130 auto attempt = crtl->ssa->new_change_attempt ();
132 insn_info *cc_insn = cc_def->insn ();
133 insn_info *other_insn = other_def->insn ();
134 if (dump_file && (dump_flags & TDF_DETAILS))
135 fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
136 other_insn->uid (), cc_insn->uid ());
138 // Try to substitute OTHER_SET into CC_INSN.
139 insn_change_watermark rtl_watermark;
140 rtx_insn *cc_rtl = cc_insn->rtl ();
141 insn_propagation prop (cc_rtl, SET_DEST (other_set),
142 SET_SRC (other_set));
143 if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
144 || prop.num_replacements == 0)
146 if (dump_file && (dump_flags & TDF_DETAILS))
147 fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
148 other_def->regno ());
149 return false;
152 // Restrict the uses to those outside notes.
153 use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
154 use_array other_set_uses = remove_note_accesses (attempt,
155 other_insn->uses ());
157 // Remove the use of the substituted value.
158 access_array_builder uses_builder (attempt);
159 uses_builder.reserve (cc_uses.size ());
160 for (use_info *use : cc_uses)
161 if (use->def () != other_def)
162 uses_builder.quick_push (use);
163 cc_uses = use_array (uses_builder.finish ());
165 // Get the list of uses for the new instruction.
166 insn_change cc_change (cc_insn);
167 cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
168 if (!cc_change.new_uses.is_valid ())
170 if (dump_file && (dump_flags & TDF_DETAILS))
171 fprintf (dump_file, "-- cannot merge uses\n");
172 return false;
175 // The instruction initially defines just two registers. recog can add
176 // extra clobbers if necessary.
177 auto_vec<access_info *, 2> new_defs;
178 new_defs.quick_push (cc_def);
179 new_defs.quick_push (other_def);
180 sort_accesses (new_defs);
181 cc_change.new_defs = def_array (access_array (new_defs));
183 // Make sure there is somewhere that the new instruction could live.
184 auto other_change = insn_change::delete_insn (other_insn);
185 insn_change *changes[] = { &other_change, &cc_change };
186 cc_change.move_range = cc_insn->ebb ()->insn_range ();
187 if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
189 if (dump_file && (dump_flags & TDF_DETAILS))
190 fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
191 return false;
194 // Tentatively install the new pattern. By convention, the CC set
195 // must be first.
196 if (m_parallel)
198 XVECEXP (m_parallel, 0, 0) = cc_set;
199 XVECEXP (m_parallel, 0, 1) = other_set;
201 else
203 rtvec vec = gen_rtvec (2, cc_set, other_set);
204 m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
206 validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);
208 // These routines report failures themselves.
209 if (!recog (attempt, cc_change, ignore_changing_insns (changes))
210 || !changes_are_worthwhile (changes)
211 || !crtl->ssa->verify_insn_changes (changes))
212 return false;
214 remove_reg_equal_equiv_notes (cc_rtl);
215 confirm_change_group ();
216 crtl->ssa->change_insns (changes);
217 m_parallel = NULL_RTX;
218 return true;
221 // Try to optimize the instruction that contains CC_DEF, where CC_DEF describes
222 // a definition of the CC register by CC_SET.
223 void
224 cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set)
226 // Search the registers used by the CC setter for an easily-substitutable
227 // def-use chain.
228 for (use_info *other_use : cc_def->insn ()->uses ())
229 if (def_info *other_def = other_use->def ())
230 if (other_use->regno () != CC_REGNUM
231 && other_def->ebb () == cc_def->ebb ())
232 if (rtx other_set = optimizable_set (other_def->insn ()))
234 rtx dest = SET_DEST (other_set);
235 if (REG_P (dest)
236 && REGNO (dest) == other_def->regno ()
237 && REG_NREGS (dest) == 1
238 && parallelize_insns (cc_def, cc_set, other_def, other_set))
239 return;
243 // Run the pass on the current function.
244 void
245 cc_fusion::execute ()
247 // Initialization.
248 calculate_dominance_info (CDI_DOMINATORS);
249 df_analyze ();
250 crtl->ssa = new rtl_ssa::function_info (cfun);
252 // Walk through all instructions that set CC. Look for a PTEST instruction
253 // that we can optimize.
255 // ??? The PTEST test isn't needed for correctness, but it ensures that the
256 // pass no effect on non-SVE code.
257 for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM))
258 if (rtx cc_set = optimizable_set (def->insn ()))
259 if (REG_P (SET_DEST (cc_set))
260 && REGNO (SET_DEST (cc_set)) == CC_REGNUM
261 && GET_CODE (SET_SRC (cc_set)) == UNSPEC
262 && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST)
263 optimize_cc_setter (def, cc_set);
265 // Finalization.
266 crtl->ssa->perform_pending_updates ();
267 free_dominance_info (CDI_DOMINATORS);
270 class pass_cc_fusion : public rtl_opt_pass
272 public:
273 pass_cc_fusion (gcc::context *ctxt)
274 : rtl_opt_pass (pass_data_cc_fusion, ctxt)
277 // opt_pass methods:
278 virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; }
279 virtual unsigned int execute (function *);
282 unsigned int
283 pass_cc_fusion::execute (function *)
285 cc_fusion ().execute ();
286 return 0;
289 } // end namespace
291 // Create a new CC fusion pass instance.
293 rtl_opt_pass *
294 make_pass_cc_fusion (gcc::context *ctxt)
296 return new pass_cc_fusion (ctxt);