1 /* ACLE support for AArch64 SME.
2 Copyright (C) 2023-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "insn-codes.h"
32 #include "basic-block.h"
34 #include "fold-const.h"
36 #include "gimple-iterator.h"
40 #include "aarch64-sve-builtins.h"
41 #include "aarch64-sve-builtins-shapes.h"
42 #include "aarch64-sve-builtins-base.h"
43 #include "aarch64-sve-builtins-sme.h"
44 #include "aarch64-sve-builtins-functions.h"
46 using namespace aarch64_sve
;
50 class load_store_za_zt0_base
: public function_base
54 memory_scalar_type (const function_instance
&) const override
56 return void_type_node
;
60 class read_write_za_base
: public function_base
63 constexpr read_write_za_base (int unspec
) : m_unspec (unspec
) {}
66 expand (function_expander
&e
) const override
68 auto za_mode
= e
.vector_mode (0);
69 auto z_mode
= e
.tuple_mode (1);
70 auto icode
= (za_mode
== VNx1TImode
71 ? code_for_aarch64_sme (m_unspec
, za_mode
, z_mode
)
72 : code_for_aarch64_sme (m_unspec
, z_mode
, z_mode
));
73 return e
.use_exact_insn (icode
);
79 using load_za_base
= add_call_properties
<load_store_za_zt0_base
,
80 CP_READ_MEMORY
| CP_READ_ZA
83 using store_za_base
= add_call_properties
<load_store_za_zt0_base
,
84 CP_WRITE_MEMORY
| CP_READ_ZA
>;
86 /* E is a load or store intrinsic that accesses a ZA slice of mode MEM_MODE.
87 The intrinsic has a vnum parameter at index ARGNO. Return true if the
88 vnum argument is a constant that is a valid ZA offset for the underlying
92 has_in_range_vnum_arg (function_expander
&e
, machine_mode mem_mode
,
95 return (e
.mode_suffix_id
== MODE_vnum
96 && CONST_INT_P (e
.args
[argno
])
97 && UINTVAL (e
.args
[argno
]) < 16 / GET_MODE_UNIT_SIZE (mem_mode
));
100 /* E is a ZA load or store intrinsic that uses instruction ICODE. Add a
101 32-bit operand that gives the total ZA slice. (The instruction hard-codes
102 the constant offset to 0, so there is no operand for that.)
104 Argument ARGNO is the intrinsic's slice argument. If the intrinsic is
105 a _vnum intrinsic, argument VNUM_ARGNO is the intrinsic's vnum operand,
106 which must be added to the slice argument. */
109 add_load_store_slice_operand (function_expander
&e
, insn_code icode
,
110 unsigned int argno
, unsigned int vnum_argno
)
112 rtx base
= e
.args
[argno
];
113 if (e
.mode_suffix_id
== MODE_vnum
)
115 rtx vnum
= force_lowpart_subreg (SImode
, e
.args
[vnum_argno
], DImode
);
116 base
= simplify_gen_binary (PLUS
, SImode
, base
, vnum
);
118 e
.add_input_operand (icode
, base
);
121 /* Add a memory operand for ZA LD1 or ST1 intrinsic E. BASE_ARGNO is
122 the index of the base argument. */
125 add_load_store_operand (function_expander
&e
, unsigned int base_argno
)
127 auto mode
= e
.vector_mode (0);
128 rtx base
= e
.get_contiguous_base (mode
, base_argno
, base_argno
+ 1,
130 auto mem
= gen_rtx_MEM (mode
, force_reg (Pmode
, base
));
131 set_mem_align (mem
, BITS_PER_UNIT
);
132 e
.add_fixed_operand (mem
);
135 /* Expand ZA LDR or STR intrinsic E. There are two underlying instructions:
137 - BASE_CODE has a zero ZA slice offset
138 - VNUM_CODE has a constant operand for the ZA slice offset. */
141 expand_ldr_str_za (function_expander
&e
, insn_code base_code
,
144 if (has_in_range_vnum_arg (e
, VNx16QImode
, 2))
146 rtx mem_offset
= aarch64_sme_vq_immediate (Pmode
,
147 UINTVAL (e
.args
[2]) * 16,
149 e
.add_input_operand (vnum_code
, e
.args
[0]);
150 e
.add_input_operand (vnum_code
, e
.args
[2]);
151 e
.add_input_operand (vnum_code
, e
.args
[1]);
152 e
.add_input_operand (vnum_code
, mem_offset
);
153 return e
.generate_insn (vnum_code
);
157 rtx base
= e
.get_contiguous_base (VNx16QImode
, 1, 2, AARCH64_FL_SM_ON
);
158 add_load_store_slice_operand (e
, base_code
, 0, 2);
159 e
.add_input_operand (base_code
, base
);
160 return e
.generate_insn (base_code
);
164 /* Use instruction ICODE to expand ZT0 load or store E. */
167 expand_ldr_str_zt0 (function_expander
&e
, insn_code icode
)
169 rtx base
= e
.convert_to_pmode (e
.args
[1]);
170 rtx mem
= gen_rtx_MEM (V8DImode
, force_reg (Pmode
, base
));
171 e
.add_fixed_operand (mem
);
172 return e
.generate_insn (icode
);
175 /* Expand ZA LD1 or ST1 intrinsic E. UNSPEC is the load or store unspec.
176 IS_LOAD is true if E is a load, false if it is a store. */
179 expand_ld1_st1 (function_expander
&e
, int unspec
, bool is_load
)
181 bool is_vnum
= has_in_range_vnum_arg (e
, e
.vector_mode (0), 4);
182 auto icode
= (is_vnum
183 ? code_for_aarch64_sme_plus (unspec
, e
.vector_mode (0))
184 : code_for_aarch64_sme (unspec
, e
.vector_mode (0)));
186 add_load_store_operand (e
, 3);
187 e
.add_input_operand (icode
, e
.args
[0]);
190 e
.add_input_operand (icode
, e
.args
[1]);
191 e
.add_input_operand (icode
, e
.args
[4]);
194 add_load_store_slice_operand (e
, icode
, 1, 4);
195 e
.add_input_operand (icode
, e
.args
[2]);
197 add_load_store_operand (e
, 3);
198 return e
.generate_insn (icode
);
201 class arm_has_sme_impl
: public function_base
204 fold (gimple_folder
&f
) const override
207 return f
.fold_to_cstu (1);
212 expand (function_expander
&e
) const override
216 emit_insn (gen_aarch64_get_sme_state ());
217 return expand_simple_binop (DImode
, LSHIFTRT
,
218 gen_rtx_REG (DImode
, R0_REGNUM
),
219 gen_int_mode (63, QImode
),
220 e
.possible_target
, true, OPTAB_LIB_WIDEN
);
224 class arm_in_streaming_mode_impl
: public function_base
227 fold (gimple_folder
&f
) const override
229 if (TARGET_STREAMING
)
230 return f
.fold_to_cstu (1);
231 if (TARGET_NON_STREAMING
)
232 return f
.fold_to_cstu (0);
237 expand (function_expander
&e
) const override
239 if (TARGET_STREAMING
)
242 if (TARGET_NON_STREAMING
)
248 reg
= gen_reg_rtx (DImode
);
249 emit_insn (gen_aarch64_read_svcr (reg
));
253 emit_insn (gen_aarch64_get_sme_state ());
254 reg
= gen_rtx_REG (DImode
, R0_REGNUM
);
256 return expand_simple_binop (DImode
, AND
, reg
, gen_int_mode (1, DImode
),
257 e
.possible_target
, true, OPTAB_LIB_WIDEN
);
261 /* Implements svcnts[bhwd]. */
262 class svcnts_bhwd_impl
: public function_base
265 constexpr svcnts_bhwd_impl (machine_mode ref_mode
) : m_ref_mode (ref_mode
) {}
270 return exact_log2 (GET_MODE_UNIT_SIZE (m_ref_mode
));
274 fold (gimple_folder
&f
) const override
276 if (TARGET_STREAMING
)
277 return f
.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode
));
282 expand (function_expander
&e
) const override
284 rtx cntsb
= aarch64_sme_vq_immediate (DImode
, 16, AARCH64_ISA_MODE
);
285 auto shift
= get_shift ();
289 return expand_simple_binop (DImode
, LSHIFTRT
, cntsb
,
290 gen_int_mode (shift
, QImode
),
291 e
.possible_target
, true, OPTAB_LIB_WIDEN
);
294 /* The mode of the vector associated with the [bhwd] suffix. */
295 machine_mode m_ref_mode
;
298 class svld1_za_impl
: public load_za_base
301 constexpr svld1_za_impl (int unspec
) : m_unspec (unspec
) {}
304 expand (function_expander
&e
) const override
306 return expand_ld1_st1 (e
, m_unspec
, true);
312 class svldr_za_impl
: public load_za_base
316 expand (function_expander
&e
) const override
318 return expand_ldr_str_za (e
, CODE_FOR_aarch64_sme_ldr0
,
319 code_for_aarch64_sme_ldrn (Pmode
));
323 class svldr_zt_impl
: public load_store_za_zt0_base
327 call_properties (const function_instance
&) const override
329 return CP_READ_MEMORY
| CP_WRITE_ZT0
;
333 expand (function_expander
&e
) const override
335 return expand_ldr_str_zt0 (e
, CODE_FOR_aarch64_sme_ldr_zt0
);
339 class svluti_lane_zt_impl
: public read_zt0
<function_base
>
342 CONSTEXPR
svluti_lane_zt_impl (unsigned int bits
) : m_bits (bits
) {}
345 expand (function_expander
&e
) const override
347 auto mode
= e
.tuple_mode (0);
348 e
.args
.ordered_remove (0);
349 return e
.use_exact_insn (code_for_aarch64_sme_lut (m_bits
, mode
));
355 template<insn_code (*CODE
) (machine_mode
)>
356 class svread_za_slice_base
: public function_base
360 expand (function_expander
&e
) const override
362 machine_mode mode
= e
.vectors_per_tuple () == 4 ? VNx8DImode
: VNx4DImode
;
363 rtx res
= e
.use_exact_insn (CODE (mode
));
364 return aarch64_sve_reinterpret (e
.result_mode (), res
);
368 using svread_za_impl
= add_call_properties
369 <svread_za_slice_base
<code_for_aarch64_sme_read
>, CP_READ_ZA
>;
371 using svread_za_tile_impl
= add_call_properties
<read_write_za_base
,
374 using svreadz_za_impl
= add_call_properties
375 <svread_za_slice_base
<code_for_aarch64_sme_readz
>, CP_READ_ZA
| CP_WRITE_ZA
>;
377 using svreadz_za_tile_impl
= add_call_properties
<read_write_za_base
,
378 CP_READ_ZA
| CP_WRITE_ZA
>;
380 class svst1_za_impl
: public store_za_base
383 constexpr svst1_za_impl (int unspec
) : m_unspec (unspec
) {}
386 expand (function_expander
&e
) const override
388 return expand_ld1_st1 (e
, m_unspec
, false);
394 class svstr_za_impl
: public store_za_base
398 expand (function_expander
&e
) const override
400 return expand_ldr_str_za (e
, CODE_FOR_aarch64_sme_str0
,
401 code_for_aarch64_sme_strn (Pmode
));
405 class svstr_zt_impl
: public load_store_za_zt0_base
409 call_properties (const function_instance
&) const override
411 return CP_WRITE_MEMORY
| CP_READ_ZT0
;
415 expand (function_expander
&e
) const override
417 return expand_ldr_str_zt0 (e
, CODE_FOR_aarch64_sme_str_zt0
);
421 class svsudot_za_impl
: public read_write_za
<function_base
>
425 expand (function_expander
&e
) const override
427 if (e
.mode_suffix_id
== MODE_single
)
429 auto icode
= code_for_aarch64_sme_single_sudot (e
.vector_mode (0),
431 return e
.use_exact_insn (icode
);
433 std::swap (e
.args
[1], e
.args
[2]);
434 return e
.use_exact_insn (code_for_aarch64_sme (UNSPEC_SME_USDOT
,
440 class svundef_za_impl
: public write_za
<function_base
>
444 expand (function_expander
&) const override
446 rtx target
= gen_rtx_REG (VNx16QImode
, ZA_REGNUM
);
447 emit_clobber (copy_rtx (target
));
452 class svwrite_za_impl
: public function_base
456 call_properties (const function_instance
&) const override
462 expand (function_expander
&e
) const override
464 machine_mode mode
= e
.vectors_per_tuple () == 4 ? VNx8DImode
: VNx4DImode
;
465 e
.args
[1] = aarch64_sve_reinterpret (mode
, e
.args
[1]);
466 return e
.use_exact_insn (code_for_aarch64_sme_write (mode
));
470 using svwrite_za_tile_impl
= add_call_properties
<read_write_za_base
,
471 CP_READ_ZA
| CP_WRITE_ZA
>;
473 class svzero_mask_za_impl
: public write_za
<function_base
>
477 expand (function_expander
&e
) const override
479 return e
.use_exact_insn (CODE_FOR_aarch64_sme_zero_za
);
483 /* Return the mode iterator value that is used to represent a zeroing
484 of the ZA vectors described by GROUP. */
486 zero_slices_mode (group_suffix_index group
)
514 class svzero_za_impl
: public write_za
<function_base
>
518 expand (function_expander
&e
) const override
520 if (e
.args
.length () == 1)
522 auto mode
= zero_slices_mode (e
.group_suffix_id
);
523 return e
.use_exact_insn (code_for_aarch64_sme_zero_za_slices (mode
));
525 emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode
)));
530 class svzero_zt_impl
: public write_zt0
<function_base
>
534 expand (function_expander
&) const override
536 emit_insn (gen_aarch64_sme_zero_zt0 ());
541 } /* end anonymous namespace */
543 namespace aarch64_sve
{
545 FUNCTION (arm_has_sme
, arm_has_sme_impl
, )
546 FUNCTION (arm_in_streaming_mode
, arm_in_streaming_mode_impl
, )
547 FUNCTION (svadd_za
, sme_1mode_function
, (UNSPEC_SME_ADD
, UNSPEC_SME_ADD
,
549 FUNCTION (svadd_write_za
, sme_1mode_function
, (UNSPEC_SME_ADD_WRITE
,
550 UNSPEC_SME_ADD_WRITE
, -1))
551 FUNCTION (svaddha_za
, sme_1mode_function
, (UNSPEC_SME_ADDHA
,
552 UNSPEC_SME_ADDHA
, -1))
553 FUNCTION (svaddva_za
, sme_1mode_function
, (UNSPEC_SME_ADDVA
,
554 UNSPEC_SME_ADDVA
, -1))
555 FUNCTION (svbmopa_za
, sme_2mode_function
, (-1, UNSPEC_SME_BMOPA
, -1))
556 FUNCTION (svbmops_za
, sme_2mode_function
, (-1, UNSPEC_SME_BMOPS
, -1))
557 FUNCTION (svcntsb
, svcnts_bhwd_impl
, (VNx16QImode
))
558 FUNCTION (svcntsd
, svcnts_bhwd_impl
, (VNx2DImode
))
559 FUNCTION (svcntsh
, svcnts_bhwd_impl
, (VNx8HImode
))
560 FUNCTION (svcntsw
, svcnts_bhwd_impl
, (VNx4SImode
))
561 FUNCTION (svdot_za
, sme_2mode_function
, (UNSPEC_SME_SDOT
, UNSPEC_SME_UDOT
,
563 FUNCTION (svdot_lane_za
, sme_2mode_lane_function
, (UNSPEC_SME_SDOT
,
566 FUNCTION (svld1_hor_za
, svld1_za_impl
, (UNSPEC_SME_LD1_HOR
))
567 FUNCTION (svld1_ver_za
, svld1_za_impl
, (UNSPEC_SME_LD1_VER
))
568 FUNCTION (svldr_za
, svldr_za_impl
, )
569 FUNCTION (svldr_zt
, svldr_zt_impl
, )
570 FUNCTION (svluti2_lane_zt
, svluti_lane_zt_impl
, (2))
571 FUNCTION (svluti4_lane_zt
, svluti_lane_zt_impl
, (4))
572 FUNCTION (svmla_za
, sme_2mode_function
, (UNSPEC_SME_SMLA
, UNSPEC_SME_UMLA
,
574 FUNCTION (svmla_lane_za
, sme_2mode_lane_function
, (UNSPEC_SME_SMLA
,
577 FUNCTION (svmls_za
, sme_2mode_function
, (UNSPEC_SME_SMLS
, UNSPEC_SME_UMLS
,
579 FUNCTION (svmls_lane_za
, sme_2mode_lane_function
, (UNSPEC_SME_SMLS
,
582 FUNCTION (svmopa_za
, sme_2mode_function
, (UNSPEC_SME_SMOPA
, UNSPEC_SME_UMOPA
,
584 FUNCTION (svmops_za
, sme_2mode_function
, (UNSPEC_SME_SMOPS
, UNSPEC_SME_UMOPS
,
586 FUNCTION (svread_za
, svread_za_impl
,)
587 FUNCTION (svread_hor_za
, svread_za_tile_impl
, (UNSPEC_SME_READ_HOR
))
588 FUNCTION (svread_ver_za
, svread_za_tile_impl
, (UNSPEC_SME_READ_VER
))
589 FUNCTION (svreadz_za
, svreadz_za_impl
,)
590 FUNCTION (svreadz_hor_za
, svreadz_za_tile_impl
, (UNSPEC_SME_READZ_HOR
))
591 FUNCTION (svreadz_ver_za
, svreadz_za_tile_impl
, (UNSPEC_SME_READZ_VER
))
592 FUNCTION (svst1_hor_za
, svst1_za_impl
, (UNSPEC_SME_ST1_HOR
))
593 FUNCTION (svst1_ver_za
, svst1_za_impl
, (UNSPEC_SME_ST1_VER
))
594 FUNCTION (svstr_za
, svstr_za_impl
, )
595 FUNCTION (svstr_zt
, svstr_zt_impl
, )
596 FUNCTION (svsub_za
, sme_1mode_function
, (UNSPEC_SME_SUB
, UNSPEC_SME_SUB
,
598 FUNCTION (svsub_write_za
, sme_1mode_function
, (UNSPEC_SME_SUB_WRITE
,
599 UNSPEC_SME_SUB_WRITE
, -1))
600 FUNCTION (svsudot_za
, svsudot_za_impl
,)
601 FUNCTION (svsudot_lane_za
, sme_2mode_lane_function
, (UNSPEC_SME_SUDOT
, -1, -1))
602 FUNCTION (svsuvdot_lane_za
, sme_2mode_lane_function
, (UNSPEC_SME_SUVDOT
,
604 FUNCTION (svsumopa_za
, sme_2mode_function
, (UNSPEC_SME_SUMOPA
, -1, -1))
605 FUNCTION (svsumops_za
, sme_2mode_function
, (UNSPEC_SME_SUMOPS
, -1, -1))
606 FUNCTION (svundef_za
, svundef_za_impl
, )
607 FUNCTION (svusdot_za
, sme_2mode_function
, (-1, UNSPEC_SME_USDOT
, -1))
608 FUNCTION (svusdot_lane_za
, sme_2mode_lane_function
, (-1, UNSPEC_SME_USDOT
, -1))
609 FUNCTION (svusvdot_lane_za
, sme_2mode_lane_function
, (-1, UNSPEC_SME_USVDOT
,
611 FUNCTION (svusmopa_za
, sme_2mode_function
, (-1, UNSPEC_SME_USMOPA
, -1))
612 FUNCTION (svusmops_za
, sme_2mode_function
, (-1, UNSPEC_SME_USMOPS
, -1))
613 FUNCTION (svvdot_lane_za
, sme_2mode_lane_function
, (UNSPEC_SME_SVDOT
,
616 FUNCTION (svwrite_za
, svwrite_za_impl
,)
617 FUNCTION (svwrite_hor_za
, svwrite_za_tile_impl
, (UNSPEC_SME_WRITE_HOR
))
618 FUNCTION (svwrite_ver_za
, svwrite_za_tile_impl
, (UNSPEC_SME_WRITE_VER
))
619 FUNCTION (svzero_mask_za
, svzero_mask_za_impl
, )
620 FUNCTION (svzero_za
, svzero_za_impl
, )
621 FUNCTION (svzero_zt
, svzero_zt_impl
, )
623 } /* end namespace aarch64_sve */