1 // LoadPair fusion optimization pass for AArch64.
2 // Copyright (C) 2023-2025 Free Software Foundation, Inc.
4 // This file is part of GCC.
6 // GCC is free software; you can redistribute it and/or modify it
7 // under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 3, or (at your option)
11 // GCC is distributed in the hope that it will be useful, but
12 // WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 // General Public License for more details.
16 // You should have received a copy of the GNU General Public License
17 // along with GCC; see the file COPYING3. If not see
18 // <http://www.gnu.org/licenses/>.
22 #include "coretypes.h"
29 #include "tree-pass.h"
30 #include "insn-attr.h"
31 #include "pair-fusion.h"
33 static constexpr HOST_WIDE_INT LDP_IMM_BITS
= 7;
34 static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT
= (1 << (LDP_IMM_BITS
- 1));
35 static constexpr HOST_WIDE_INT LDP_MAX_IMM
= LDP_IMM_SIGN_BIT
- 1;
36 static constexpr HOST_WIDE_INT LDP_MIN_IMM
= -LDP_MAX_IMM
- 1;
38 struct aarch64_pair_fusion
: public pair_fusion
40 bool fpsimd_op_p (rtx reg_op
, machine_mode mem_mode
,
41 bool load_p
) override final
43 // Before RA, we use the modes, noting that stores of constant zero
44 // operands use GPRs (even in non-integer modes). After RA, we use
45 // the hard register numbers.
46 return reload_completed
47 ? (REG_P (reg_op
) && FP_REGNUM_P (REGNO (reg_op
)))
48 : (GET_MODE_CLASS (mem_mode
) != MODE_INT
49 && (load_p
|| !aarch64_const_zero_rtx_p (reg_op
)));
52 bool pair_mem_insn_p (rtx_insn
*rti
, bool &load_p
) override final
;
54 bool pair_mem_ok_with_policy (rtx base_mem
, bool load_p
) override final
56 return aarch64_mem_ok_with_ldpstp_policy_model (base_mem
,
61 bool pair_operand_mode_ok_p (machine_mode mode
) override final
;
63 rtx
gen_pair (rtx
*pats
, rtx writeback
, bool load_p
) override final
;
65 bool pair_reg_operand_ok_p (bool load_p
, rtx reg_op
,
66 machine_mode mode
) override final
69 ? aarch64_ldp_reg_operand (reg_op
, mode
)
70 : aarch64_stp_reg_operand (reg_op
, mode
));
73 int pair_mem_alias_check_limit () override final
75 return aarch64_ldp_alias_check_limit
;
78 bool should_handle_writeback (writeback_type which
) override final
80 if (which
== writeback_type::ALL
)
81 return aarch64_ldp_writeback
> 1;
83 return aarch64_ldp_writeback
;
86 bool track_loads_p () override final
88 return aarch64_tune_params
.ldp_policy_model
89 != AARCH64_LDP_STP_POLICY_NEVER
;
92 bool track_stores_p () override final
94 return aarch64_tune_params
.stp_policy_model
95 != AARCH64_LDP_STP_POLICY_NEVER
;
98 bool pair_mem_in_range_p (HOST_WIDE_INT offset
) override final
100 return (offset
>= LDP_MIN_IMM
&& offset
<= LDP_MAX_IMM
);
103 rtx
gen_promote_writeback_pair (rtx wb_effect
, rtx mem
, rtx regs
[2],
104 bool load_p
) override final
;
106 rtx
destructure_pair (rtx regs
[2], rtx pattern
, bool load_p
) override final
;
110 aarch64_pair_fusion::pair_mem_insn_p (rtx_insn
*rti
, bool &load_p
)
112 rtx pat
= PATTERN (rti
);
113 if (GET_CODE (pat
) == PARALLEL
114 && XVECLEN (pat
, 0) == 2)
116 const auto attr
= get_attr_ldpstp (rti
);
117 if (attr
== LDPSTP_NONE
)
120 load_p
= (attr
== LDPSTP_LDP
);
121 gcc_checking_assert (load_p
|| attr
== LDPSTP_STP
);
128 aarch64_pair_fusion::gen_pair (rtx
*pats
, rtx writeback
, bool load_p
)
134 auto patvec
= gen_rtvec (3, writeback
, pats
[0], pats
[1]);
135 return gen_rtx_PARALLEL (VOIDmode
, patvec
);
138 return aarch64_gen_load_pair (XEXP (pats
[0], 0),
142 return aarch64_gen_store_pair (XEXP (pats
[0], 0),
148 // Return true if we should consider forming ldp/stp insns from memory
149 // accesses with operand mode MODE at this stage in compilation.
151 aarch64_pair_fusion::pair_operand_mode_ok_p (machine_mode mode
)
153 if (!aarch64_ldpstp_operand_mode_p (mode
))
156 // We don't pair up TImode accesses before RA because TImode is
157 // special in that it can be allocated to a pair of GPRs or a single
158 // FPR, and the RA is best placed to make that decision.
159 return reload_completed
|| mode
!= TImode
;
162 // Given a pair mode MODE, return a canonical mode to be used for a single
163 // operand of such a pair. Currently we only use this when promoting a
164 // non-writeback pair into a writeback pair, as it isn't otherwise clear
165 // which mode to use when storing a modeless CONST_INT.
167 aarch64_operand_mode_for_pair_mode (machine_mode mode
)
182 // Given a load pair insn in PATTERN, unpack the insn, storing
183 // the registers in REGS and returning the mem.
185 aarch64_destructure_load_pair (rtx regs
[2], rtx pattern
)
189 for (int i
= 0; i
< 2; i
++)
191 rtx pat
= XVECEXP (pattern
, 0, i
);
192 regs
[i
] = XEXP (pat
, 0);
193 rtx unspec
= XEXP (pat
, 1);
194 gcc_checking_assert (GET_CODE (unspec
) == UNSPEC
);
195 rtx this_mem
= XVECEXP (unspec
, 0, 0);
197 gcc_checking_assert (rtx_equal_p (mem
, this_mem
));
200 gcc_checking_assert (MEM_P (this_mem
));
208 // Given a store pair insn in PATTERN, unpack the insn, storing
209 // the register operands in REGS, and returning the mem.
211 aarch64_destructure_store_pair (rtx regs
[2], rtx pattern
)
213 rtx mem
= XEXP (pattern
, 0);
214 rtx unspec
= XEXP (pattern
, 1);
215 gcc_checking_assert (GET_CODE (unspec
) == UNSPEC
);
216 for (int i
= 0; i
< 2; i
++)
217 regs
[i
] = XVECEXP (unspec
, 0, i
);
222 aarch64_pair_fusion::destructure_pair (rtx regs
[2], rtx pattern
, bool load_p
)
225 return aarch64_destructure_load_pair (regs
, pattern
);
227 return aarch64_destructure_store_pair (regs
, pattern
);
231 aarch64_pair_fusion::gen_promote_writeback_pair (rtx wb_effect
, rtx pair_mem
,
235 auto op_mode
= aarch64_operand_mode_for_pair_mode (GET_MODE (pair_mem
));
237 machine_mode modes
[2];
238 for (int i
= 0; i
< 2; i
++)
240 machine_mode mode
= GET_MODE (regs
[i
]);
242 gcc_checking_assert (mode
!= VOIDmode
);
243 else if (mode
== VOIDmode
)
249 const auto op_size
= GET_MODE_SIZE (modes
[0]);
250 gcc_checking_assert (known_eq (op_size
, GET_MODE_SIZE (modes
[1])));
253 for (int i
= 0; i
< 2; i
++)
255 rtx mem
= adjust_address_nv (pair_mem
, modes
[i
], op_size
* i
);
257 ? gen_rtx_SET (regs
[i
], mem
)
258 : gen_rtx_SET (mem
, regs
[i
]);
261 return gen_rtx_PARALLEL (VOIDmode
,
262 gen_rtvec (3, wb_effect
, pats
[0], pats
[1]));
267 const pass_data pass_data_ldp_fusion
=
270 "ldp_fusion", /* name */
271 OPTGROUP_NONE
, /* optinfo_flags */
273 0, /* properties_required */
274 0, /* properties_provided */
275 0, /* properties_destroyed */
276 0, /* todo_flags_start */
277 TODO_df_finish
, /* todo_flags_finish */
280 class pass_ldp_fusion
: public rtl_opt_pass
283 pass_ldp_fusion (gcc::context
*ctx
)
284 : rtl_opt_pass (pass_data_ldp_fusion
, ctx
)
287 opt_pass
*clone () override
{ return new pass_ldp_fusion (m_ctxt
); }
289 bool gate (function
*) final override
291 if (!optimize
|| optimize_debug
)
294 if (reload_completed
)
295 return flag_aarch64_late_ldp_fusion
;
297 return flag_aarch64_early_ldp_fusion
;
300 unsigned execute (function
*) final override
302 aarch64_pair_fusion pass
;
311 make_pass_ldp_fusion (gcc::context
*ctx
)
313 return new pass_ldp_fusion (ctx
);