1 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
2 Copyright (C) 2018-2025 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
28 #include "insn-codes.h"
32 #include "basic-block.h"
34 #include "fold-const.h"
36 #include "gimple-iterator.h"
40 #include "tree-vector-builder.h"
41 #include "rtx-vector-builder.h"
42 #include "vec-perm-indices.h"
43 #include "aarch64-sve-builtins.h"
44 #include "aarch64-sve-builtins-shapes.h"
45 #include "aarch64-sve-builtins-base.h"
46 #include "aarch64-sve-builtins-functions.h"
47 #include "aarch64-builtins.h"
49 #include "gimple-fold.h"
52 using namespace aarch64_sve
;
56 /* Return true if VAL is an undefined value. */
60 if (TREE_CODE (val
) == SSA_NAME
)
62 if (ssa_undefined_value_p (val
, false))
65 gimple
*def
= SSA_NAME_DEF_STMT (val
);
66 if (gcall
*call
= dyn_cast
<gcall
*> (def
))
67 if (tree fndecl
= gimple_call_fndecl (call
))
68 if (const function_instance
*instance
= lookup_fndecl (fndecl
))
69 if (instance
->base
== functions::svundef
)
75 /* Return the UNSPEC_CMLA* unspec for rotation amount ROT. */
81 case 0: return UNSPEC_CMLA
;
82 case 90: return UNSPEC_CMLA90
;
83 case 180: return UNSPEC_CMLA180
;
84 case 270: return UNSPEC_CMLA270
;
85 default: gcc_unreachable ();
89 /* Return the UNSPEC_FCMLA* unspec for rotation amount ROT. */
91 unspec_fcmla (int rot
)
95 case 0: return UNSPEC_FCMLA
;
96 case 90: return UNSPEC_FCMLA90
;
97 case 180: return UNSPEC_FCMLA180
;
98 case 270: return UNSPEC_FCMLA270
;
99 default: gcc_unreachable ();
103 /* Return the UNSPEC_COND_FCMLA* unspec for rotation amount ROT. */
105 unspec_cond_fcmla (int rot
)
109 case 0: return UNSPEC_COND_FCMLA
;
110 case 90: return UNSPEC_COND_FCMLA90
;
111 case 180: return UNSPEC_COND_FCMLA180
;
112 case 270: return UNSPEC_COND_FCMLA270
;
113 default: gcc_unreachable ();
117 /* Expand a call to svmad, or svmla after reordering its operands.
118 Make _m forms merge with argument MERGE_ARGNO. */
120 expand_mad (function_expander
&e
,
121 unsigned int merge_argno
= DEFAULT_MERGE_ARGNO
)
123 if (e
.pred
== PRED_x
)
126 if (e
.type_suffix (0).integer_p
)
127 icode
= code_for_aarch64_pred_fma (e
.vector_mode (0));
129 icode
= code_for_aarch64_pred (UNSPEC_COND_FMLA
, e
.vector_mode (0));
130 return e
.use_pred_x_insn (icode
);
133 insn_code icode
= e
.direct_optab_handler (cond_fma_optab
);
134 return e
.use_cond_insn (icode
, merge_argno
);
137 /* Expand a call to svmla_lane or svmls_lane using floating-point unspec
140 expand_mla_mls_lane (function_expander
&e
, int unspec
)
142 /* Put the operands in the normal (fma ...) order, with the accumulator
143 last. This fits naturally since that's also the unprinted operand
144 in the asm output. */
145 e
.rotate_inputs_left (0, 4);
146 insn_code icode
= code_for_aarch64_lane (unspec
, e
.vector_mode (0));
147 return e
.use_exact_insn (icode
);
150 /* Expand a call to svmsb, or svmls after reordering its operands.
151 Make _m forms merge with argument MERGE_ARGNO. */
153 expand_msb (function_expander
&e
,
154 unsigned int merge_argno
= DEFAULT_MERGE_ARGNO
)
156 if (e
.pred
== PRED_x
)
159 if (e
.type_suffix (0).integer_p
)
160 icode
= code_for_aarch64_pred_fnma (e
.vector_mode (0));
162 icode
= code_for_aarch64_pred (UNSPEC_COND_FMLS
, e
.vector_mode (0));
163 return e
.use_pred_x_insn (icode
);
166 insn_code icode
= e
.direct_optab_handler (cond_fnma_optab
);
167 return e
.use_cond_insn (icode
, merge_argno
);
170 class svabd_impl
: public function_base
174 expand (function_expander
&e
) const override
176 /* The integer operations are represented as the subtraction of the
177 minimum from the maximum, with the signedness of the instruction
178 keyed off the signedness of the maximum operation. */
179 rtx_code max_code
= e
.type_suffix (0).unsigned_p
? UMAX
: SMAX
;
181 if (e
.pred
== PRED_x
)
183 if (e
.type_suffix (0).integer_p
)
184 icode
= code_for_aarch64_pred_abd (max_code
, e
.vector_mode (0));
186 icode
= code_for_aarch64_pred_abd (e
.vector_mode (0));
187 return e
.use_pred_x_insn (icode
);
190 if (e
.type_suffix (0).integer_p
)
191 icode
= code_for_aarch64_cond_abd (max_code
, e
.vector_mode (0));
193 icode
= code_for_aarch64_cond_abd (e
.vector_mode (0));
194 return e
.use_cond_insn (icode
);
198 /* Implements svacge, svacgt, svacle and svaclt. */
199 class svac_impl
: public function_base
202 CONSTEXPR
svac_impl (int unspec
) : m_unspec (unspec
) {}
205 fold (gimple_folder
&f
) const override
207 tree pg
= gimple_call_arg (f
.call
, 0);
209 return f
.fold_call_to (pg
);
214 expand (function_expander
&e
) const override
216 e
.add_ptrue_hint (0, e
.gp_mode (0));
217 insn_code icode
= code_for_aarch64_pred_fac (m_unspec
, e
.vector_mode (0));
218 return e
.use_exact_insn (icode
);
221 /* The unspec code for the underlying comparison. */
225 class svadda_impl
: public function_base
229 fold (gimple_folder
&f
) const override
231 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
232 return f
.fold_call_to (gimple_call_arg (f
.call
, 1));
237 expand (function_expander
&e
) const override
239 /* Put the predicate last, as required by mask_fold_left_plus_optab. */
240 e
.rotate_inputs_left (0, 3);
241 machine_mode mode
= e
.vector_mode (0);
242 insn_code icode
= direct_optab_handler (mask_fold_left_plus_optab
, mode
);
243 return e
.use_exact_insn (icode
);
247 class svaddv_impl
: public reduction
250 CONSTEXPR
svaddv_impl ()
251 : reduction (UNSPEC_SADDV
, UNSPEC_UADDV
, UNSPEC_FADDV
) {}
254 fold (gimple_folder
&f
) const override
256 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
257 return f
.fold_call_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
262 /* Implements svadr[bhwd]. */
263 class svadr_bhwd_impl
: public function_base
266 CONSTEXPR
svadr_bhwd_impl (unsigned int shift
) : m_shift (shift
) {}
269 expand (function_expander
&e
) const override
271 machine_mode mode
= GET_MODE (e
.args
[0]);
273 return e
.use_exact_insn (code_for_aarch64_adr (mode
));
275 /* Turn the access size into an extra shift argument. */
276 rtx shift
= gen_int_mode (m_shift
, GET_MODE_INNER (mode
));
277 e
.args
.quick_push (expand_vector_broadcast (mode
, shift
));
278 return e
.use_exact_insn (code_for_aarch64_adr_shift (mode
));
280 /* How many bits left to shift the vector displacement. */
281 unsigned int m_shift
;
285 class svandv_impl
: public reduction
288 CONSTEXPR
svandv_impl () : reduction (UNSPEC_ANDV
) {}
291 fold (gimple_folder
&f
) const override
293 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
294 return f
.fold_call_to (build_all_ones_cst (TREE_TYPE (f
.lhs
)));
299 class svbic_impl
: public function_base
303 expand (function_expander
&e
) const override
305 /* Convert svbic of a constant into svand of its inverse. */
306 if (CONST_INT_P (e
.args
[2]))
308 machine_mode mode
= GET_MODE_INNER (e
.vector_mode (0));
309 e
.args
[2] = simplify_unary_operation (NOT
, mode
, e
.args
[2], mode
);
310 return e
.map_to_rtx_codes (AND
, AND
, -1, -1);
313 if (e
.type_suffix_ids
[0] == TYPE_SUFFIX_b
)
315 gcc_assert (e
.pred
== PRED_z
);
316 return e
.use_exact_insn (CODE_FOR_aarch64_pred_bicvnx16bi_z
);
319 if (e
.pred
== PRED_x
)
320 return e
.use_unpred_insn (e
.direct_optab_handler (andn_optab
));
322 return e
.use_cond_insn (code_for_cond_bic (e
.vector_mode (0)));
326 /* Implements svbrkn, svbrkpa and svbrkpb. */
327 class svbrk_binary_impl
: public function_base
330 CONSTEXPR
svbrk_binary_impl (int unspec
) : m_unspec (unspec
) {}
333 expand (function_expander
&e
) const override
335 return e
.use_exact_insn (code_for_aarch64_brk (m_unspec
));
338 /* The unspec code associated with the operation. */
342 /* Implements svbrka and svbrkb. */
343 class svbrk_unary_impl
: public function_base
346 CONSTEXPR
svbrk_unary_impl (int unspec
) : m_unspec (unspec
) {}
349 expand (function_expander
&e
) const override
351 return e
.use_cond_insn (code_for_aarch64_brk (m_unspec
));
354 /* The unspec code associated with the operation. */
358 class svcadd_impl
: public function_base
362 expand (function_expander
&e
) const override
364 /* Convert the rotation amount into a specific unspec. */
365 int rot
= INTVAL (e
.args
.pop ());
367 return e
.map_to_unspecs (UNSPEC_CADD90
, UNSPEC_CADD90
,
368 UNSPEC_COND_FCADD90
);
370 return e
.map_to_unspecs (UNSPEC_CADD270
, UNSPEC_CADD270
,
371 UNSPEC_COND_FCADD270
);
376 /* Implements svclasta and svclastb. */
377 class svclast_impl
: public quiet
<function_base
>
380 CONSTEXPR
svclast_impl (int unspec
) : m_unspec (unspec
) {}
383 fold (gimple_folder
&f
) const override
385 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
386 return f
.fold_call_to (gimple_call_arg (f
.call
, 1));
391 expand (function_expander
&e
) const override
393 /* Match the fold_extract_optab order. */
394 std::swap (e
.args
[0], e
.args
[1]);
395 machine_mode mode
= e
.vector_mode (0);
397 if (e
.mode_suffix_id
== MODE_n
)
398 icode
= code_for_fold_extract (m_unspec
, mode
);
400 icode
= code_for_aarch64_fold_extract_vector (m_unspec
, mode
);
401 return e
.use_exact_insn (icode
);
404 /* The unspec code associated with the operation. */
408 class svcmla_impl
: public function_base
412 expand (function_expander
&e
) const override
414 /* Convert the rotation amount into a specific unspec. */
415 int rot
= INTVAL (e
.args
.pop ());
416 if (e
.type_suffix (0).float_p
)
418 /* Make the operand order the same as the one used by the fma optabs,
419 with the accumulator last. */
420 e
.rotate_inputs_left (1, 4);
421 return e
.map_to_unspecs (-1, -1, unspec_cond_fcmla (rot
), 3);
425 int cmla
= unspec_cmla (rot
);
426 return e
.map_to_unspecs (cmla
, cmla
, -1);
431 class svcmla_lane_impl
: public function_base
435 expand (function_expander
&e
) const override
437 /* Convert the rotation amount into a specific unspec. */
438 int rot
= INTVAL (e
.args
.pop ());
439 machine_mode mode
= e
.vector_mode (0);
440 if (e
.type_suffix (0).float_p
)
442 /* Make the operand order the same as the one used by the fma optabs,
443 with the accumulator last. */
444 e
.rotate_inputs_left (0, 4);
445 insn_code icode
= code_for_aarch64_lane (unspec_fcmla (rot
), mode
);
446 return e
.use_exact_insn (icode
);
450 insn_code icode
= code_for_aarch64_lane (unspec_cmla (rot
), mode
);
451 return e
.use_exact_insn (icode
);
456 /* Implements svcmp<cc> (except svcmpuo, which is handled separately). */
457 class svcmp_impl
: public function_base
460 CONSTEXPR
svcmp_impl (tree_code code
, int unspec_for_fp
)
461 : m_code (code
), m_unspec_for_fp (unspec_for_fp
) {}
464 fold (gimple_folder
&f
) const override
466 tree pg
= gimple_call_arg (f
.call
, 0);
467 tree rhs1
= gimple_call_arg (f
.call
, 1);
468 tree rhs2
= gimple_call_arg (f
.call
, 2);
470 /* Convert a ptrue-predicated integer comparison into the corresponding
471 gimple-level operation. */
472 if (integer_all_onesp (pg
)
473 && f
.type_suffix (0).element_bytes
== 1
474 && f
.type_suffix (0).integer_p
)
476 gimple_seq stmts
= NULL
;
477 rhs2
= f
.force_vector (stmts
, TREE_TYPE (rhs1
), rhs2
);
478 gsi_insert_seq_before (f
.gsi
, stmts
, GSI_SAME_STMT
);
479 return gimple_build_assign (f
.lhs
, m_code
, rhs1
, rhs2
);
483 return f
.fold_call_to (pg
);
488 expand (function_expander
&e
) const override
490 machine_mode mode
= e
.vector_mode (0);
492 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
494 e
.add_ptrue_hint (0, e
.gp_mode (0));
496 if (e
.type_suffix (0).integer_p
)
498 bool unsigned_p
= e
.type_suffix (0).unsigned_p
;
499 rtx_code code
= get_rtx_code (m_code
, unsigned_p
);
500 return e
.use_exact_insn (code_for_aarch64_pred_cmp (code
, mode
));
503 insn_code icode
= code_for_aarch64_pred_fcm (m_unspec_for_fp
, mode
);
504 return e
.use_exact_insn (icode
);
507 /* The tree code associated with the comparison. */
510 /* The unspec code to use for floating-point comparisons. */
514 /* Implements svcmp<cc>_wide. */
515 class svcmp_wide_impl
: public function_base
518 CONSTEXPR
svcmp_wide_impl (tree_code code
, int unspec_for_sint
,
520 : m_code (code
), m_unspec_for_sint (unspec_for_sint
),
521 m_unspec_for_uint (unspec_for_uint
) {}
524 fold (gimple_folder
&f
) const override
526 tree pg
= gimple_call_arg (f
.call
, 0);
528 return f
.fold_call_to (pg
);
533 expand (function_expander
&e
) const override
535 machine_mode mode
= e
.vector_mode (0);
536 bool unsigned_p
= e
.type_suffix (0).unsigned_p
;
537 rtx_code code
= get_rtx_code (m_code
, unsigned_p
);
539 /* Comparisons are UNSPEC_PRED_Z operations and so need a hint
541 e
.add_ptrue_hint (0, e
.gp_mode (0));
543 /* If the argument is a constant that the unwidened comparisons
544 can handle directly, use them instead. */
545 insn_code icode
= code_for_aarch64_pred_cmp (code
, mode
);
546 rtx op2
= unwrap_const_vec_duplicate (e
.args
[3]);
548 && insn_data
[icode
].operand
[4].predicate (op2
, DImode
))
551 return e
.use_exact_insn (icode
);
554 int unspec
= (unsigned_p
? m_unspec_for_uint
: m_unspec_for_sint
);
555 return e
.use_exact_insn (code_for_aarch64_pred_cmp_wide (unspec
, mode
));
558 /* The tree code associated with the comparison. */
561 /* The unspec codes for signed and unsigned wide comparisons
563 int m_unspec_for_sint
;
564 int m_unspec_for_uint
;
567 class svcmpuo_impl
: public quiet
<function_base
>
572 fold (gimple_folder
&f
) const override
574 tree pg
= gimple_call_arg (f
.call
, 0);
576 return f
.fold_call_to (pg
);
581 expand (function_expander
&e
) const override
583 e
.add_ptrue_hint (0, e
.gp_mode (0));
584 return e
.use_exact_insn (code_for_aarch64_pred_fcmuo (e
.vector_mode (0)));
588 class svcnot_impl
: public function_base
592 expand (function_expander
&e
) const override
594 machine_mode mode
= e
.vector_mode (0);
595 machine_mode pred_mode
= e
.gp_mode (0);
596 /* The underlying _x pattern is effectively:
598 dst = src == 0 ? 1 : 0
600 rather than an UNSPEC_PRED_X. Using this form allows autovec
601 constructs to be matched by combine, but it means that the
602 predicate on the src == 0 comparison must be all-true.
604 For simplicity, represent other _x operations as fully-defined _m
605 operations rather than using a separate bespoke pattern. */
607 && gen_lowpart (pred_mode
, e
.args
[0]) == CONSTM1_RTX (pred_mode
))
608 return e
.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode
));
609 return e
.use_cond_insn (code_for_cond_cnot (mode
),
610 e
.pred
== PRED_x
? 1 : 0);
614 /* Implements svcnt[bhwd], which count the number of elements
615 in a particular vector mode. */
616 class svcnt_bhwd_impl
: public function_base
619 CONSTEXPR
svcnt_bhwd_impl (machine_mode ref_mode
) : m_ref_mode (ref_mode
) {}
622 fold (gimple_folder
&f
) const override
624 return f
.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode
));
628 expand (function_expander
&) const override
630 return gen_int_mode (GET_MODE_NUNITS (m_ref_mode
), DImode
);
633 /* The mode of the vector associated with the [bhwd] suffix. */
634 machine_mode m_ref_mode
;
637 /* Implements svcnt[bhwd]_pat. */
638 class svcnt_bhwd_pat_impl
: public svcnt_bhwd_impl
641 using svcnt_bhwd_impl::svcnt_bhwd_impl
;
644 fold (gimple_folder
&f
) const override
646 tree pattern_arg
= gimple_call_arg (f
.call
, 0);
647 aarch64_svpattern pattern
= (aarch64_svpattern
) tree_to_shwi (pattern_arg
);
649 if (pattern
== AARCH64_SV_ALL
)
650 /* svcvnt[bwhd]_pat (SV_ALL) == svcnt[bwhd] (). */
651 return svcnt_bhwd_impl::fold (f
);
653 /* See whether we can count the number of elements in the pattern
655 unsigned int elements_per_vq
= 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode
);
656 HOST_WIDE_INT value
= aarch64_fold_sve_cnt_pat (pattern
, elements_per_vq
);
658 return f
.fold_to_cstu (value
);
664 expand (function_expander
&e
) const override
666 unsigned int elements_per_vq
= 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode
);
667 e
.args
.quick_push (gen_int_mode (elements_per_vq
, DImode
));
668 e
.args
.quick_push (const1_rtx
);
669 return e
.use_exact_insn (CODE_FOR_aarch64_sve_cnt_pat
);
673 class svcntp_impl
: public function_base
678 fold (gimple_folder
&f
) const override
680 tree pg
= gimple_call_arg (f
.call
, 0);
682 return f
.fold_call_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
687 expand (function_expander
&e
) const override
689 if (e
.type_suffix (0).tclass
== TYPE_count
)
691 unsigned int bits
= e
.type_suffix (0).element_bits
;
692 return e
.use_exact_insn (code_for_aarch64_sve_cntp_c (bits
));
695 machine_mode mode
= e
.vector_mode (0);
696 e
.add_ptrue_hint (0, mode
);
697 return e
.use_exact_insn (code_for_aarch64_pred_cntp (mode
));
702 : public QUIET_CODE_FOR_MODE0 (aarch64_sve_compact
)
706 fold (gimple_folder
&f
) const override
708 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
709 return f
.fold_call_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
714 /* Implements svcreate2, svcreate3 and svcreate4. */
715 class svcreate_impl
: public quiet
<multi_vector_function
>
718 using quiet
<multi_vector_function
>::quiet
;
721 fold (gimple_folder
&f
) const override
723 unsigned int nargs
= gimple_call_num_args (f
.call
);
724 tree lhs_type
= TREE_TYPE (f
.lhs
);
726 /* Replace the call with a clobber of the result (to prevent it from
727 becoming upwards exposed) followed by stores into each individual
730 The fold routines expect the replacement statement to have the
731 same lhs as the original call, so return the clobber statement
732 rather than the final vector store. */
733 gassign
*clobber
= gimple_build_assign (f
.lhs
, build_clobber (lhs_type
));
735 for (unsigned int i
= nargs
; i
-- > 0; )
737 tree rhs_vector
= gimple_call_arg (f
.call
, i
);
738 tree field
= tuple_type_field (TREE_TYPE (f
.lhs
));
739 tree lhs_array
= build3 (COMPONENT_REF
, TREE_TYPE (field
),
740 unshare_expr (f
.lhs
), field
, NULL_TREE
);
741 tree lhs_vector
= build4 (ARRAY_REF
, TREE_TYPE (rhs_vector
),
742 lhs_array
, size_int (i
),
743 NULL_TREE
, NULL_TREE
);
744 gassign
*assign
= gimple_build_assign (lhs_vector
, rhs_vector
);
745 gsi_insert_after (f
.gsi
, assign
, GSI_SAME_STMT
);
751 expand (function_expander
&e
) const override
753 rtx lhs_tuple
= e
.get_nonoverlapping_reg_target ();
755 /* Record that LHS_TUPLE is dead before the first store. */
756 emit_clobber (lhs_tuple
);
757 for (unsigned int i
= 0; i
< e
.args
.length (); ++i
)
759 /* Use an lvalue subreg to refer to vector I in LHS_TUPLE. */
760 rtx lhs_vector
= simplify_gen_subreg (GET_MODE (e
.args
[i
]),
761 lhs_tuple
, GET_MODE (lhs_tuple
),
762 i
* BYTES_PER_SVE_VECTOR
);
763 emit_move_insn (lhs_vector
, e
.args
[i
]);
769 class svcvt_impl
: public function_base
773 expand (function_expander
&e
) const override
776 if (e
.pred
== PRED_none
)
778 machine_mode mode0
= e
.result_mode ();
779 machine_mode mode1
= GET_MODE (e
.args
[0]);
781 if (e
.type_suffix (0).integer_p
)
782 optab
= e
.type_suffix (0).unsigned_p
? ufix_optab
: sfix_optab
;
783 else if (e
.type_suffix (1).integer_p
)
784 optab
= e
.type_suffix (1).unsigned_p
? ufloat_optab
: sfloat_optab
;
785 else if (e
.type_suffix (0).element_bits
786 < e
.type_suffix (1).element_bits
)
790 icode
= convert_optab_handler (optab
, mode0
, mode1
);
791 gcc_assert (icode
!= CODE_FOR_nothing
);
792 return e
.use_exact_insn (icode
);
794 machine_mode mode0
= e
.vector_mode (0);
795 machine_mode mode1
= e
.vector_mode (1);
796 /* All this complication comes from the need to select four things
799 (1) the kind of conversion (int<-float, float<-int, float<-float)
800 (2) signed vs. unsigned integers, where relevant
801 (3) the predication mode, which must be the wider of the predication
802 modes for MODE0 and MODE1
803 (4) the predication type (m, x or z)
805 The only supported int<->float conversions for which the integer is
806 narrower than the float are SI<->DF. It's therefore more convenient
807 to handle (3) by defining two patterns for int<->float conversions:
808 one in which the integer is at least as wide as the float and so
809 determines the predication mode, and another single SI<->DF pattern
810 in which the float's mode determines the predication mode (which is
811 always VNx2BI in that case).
813 The names of the patterns follow the optab convention of giving
814 the source mode before the destination mode. */
815 if (e
.type_suffix (1).integer_p
)
817 int unspec
= (e
.type_suffix (1).unsigned_p
819 : UNSPEC_COND_SCVTF
);
820 if (e
.type_suffix (0).element_bytes
<= e
.type_suffix (1).element_bytes
)
821 icode
= (e
.pred
== PRED_x
822 ? code_for_aarch64_sve_nonextend (unspec
, mode1
, mode0
)
823 : code_for_cond_nonextend (unspec
, mode1
, mode0
));
825 icode
= (e
.pred
== PRED_x
826 ? code_for_aarch64_sve_extend (unspec
, mode1
, mode0
)
827 : code_for_cond_extend (unspec
, mode1
, mode0
));
831 int unspec
= (!e
.type_suffix (0).integer_p
? UNSPEC_COND_FCVT
832 : e
.type_suffix (0).unsigned_p
? UNSPEC_COND_FCVTZU
833 : UNSPEC_COND_FCVTZS
);
834 if (e
.type_suffix (0).element_bytes
>= e
.type_suffix (1).element_bytes
)
835 icode
= (e
.pred
== PRED_x
836 ? code_for_aarch64_sve_nontrunc (unspec
, mode1
, mode0
)
837 : code_for_cond_nontrunc (unspec
, mode1
, mode0
));
839 icode
= (e
.pred
== PRED_x
840 ? code_for_aarch64_sve_trunc (unspec
, mode1
, mode0
)
841 : code_for_cond_trunc (unspec
, mode1
, mode0
));
844 if (e
.pred
== PRED_x
)
845 return e
.use_pred_x_insn (icode
);
846 return e
.use_cond_insn (icode
);
850 class svcvtnt_impl
: public CODE_FOR_MODE0 (aarch64_sve_cvtnt
)
854 fold (gimple_folder
&f
) const override
856 if (f
.pred
== PRED_x
&& is_pfalse (gimple_call_arg (f
.call
, 1)))
857 f
.fold_call_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
862 class svdiv_impl
: public rtx_code_function
865 CONSTEXPR
svdiv_impl ()
866 : rtx_code_function (DIV
, UDIV
, UNSPEC_COND_FDIV
) {}
869 fold (gimple_folder
&f
) const override
871 if (auto *res
= f
.fold_const_binary (TRUNC_DIV_EXPR
))
874 /* If the divisor is all ones, fold to dividend. */
875 tree op1
= gimple_call_arg (f
.call
, 1);
876 tree op2
= gimple_call_arg (f
.call
, 2);
877 if (integer_onep (op2
))
878 return f
.fold_active_lanes_to (op1
);
880 /* If one of the operands is all zeros, fold to zero vector. */
881 if (integer_zerop (op1
) || integer_zerop (op2
))
882 return f
.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
884 /* If the divisor is all integer -1, fold to svneg. */
885 tree pg
= gimple_call_arg (f
.call
, 0);
886 if (!f
.type_suffix (0).unsigned_p
&& integer_minus_onep (op2
))
888 function_instance
instance ("svneg", functions::svneg
, shapes::unary
,
889 MODE_none
, f
.type_suffix_ids
, GROUP_none
,
891 gcall
*call
= f
.redirect_call (instance
);
892 unsigned offset_index
= 0;
893 if (f
.pred
== PRED_m
)
896 gimple_call_set_arg (call
, 0, op1
);
899 gimple_set_num_ops (call
, 5);
900 gimple_call_set_arg (call
, offset_index
, pg
);
901 gimple_call_set_arg (call
, offset_index
+ 1, op1
);
905 /* If the divisor is a uniform power of 2, fold to a shift
907 tree op2_cst
= uniform_integer_cst_p (op2
);
908 if (!op2_cst
|| !integer_pow2p (op2_cst
))
914 if (f
.type_suffix (0).unsigned_p
&& tree_to_uhwi (op2_cst
) != 1)
916 function_instance
instance ("svlsr", functions::svlsr
,
917 shapes::binary_uint_opt_n
, MODE_n
,
918 f
.type_suffix_ids
, GROUP_none
, f
.pred
,
920 call
= f
.redirect_call (instance
);
921 tree d
= INTEGRAL_TYPE_P (TREE_TYPE (op2
)) ? op2
: op2_cst
;
922 new_divisor
= wide_int_to_tree (TREE_TYPE (d
), tree_log2 (d
));
926 if (tree_int_cst_sign_bit (op2_cst
)
927 || tree_to_shwi (op2_cst
) == 1)
930 function_instance
instance ("svasrd", functions::svasrd
,
931 shapes::shift_right_imm
, MODE_n
,
932 f
.type_suffix_ids
, GROUP_none
, f
.pred
,
934 call
= f
.redirect_call (instance
);
935 new_divisor
= wide_int_to_tree (scalar_types
[VECTOR_TYPE_svuint64_t
],
936 tree_log2 (op2_cst
));
939 gimple_call_set_arg (call
, 2, new_divisor
);
945 class svdot_impl
: public function_base
949 expand (function_expander
&e
) const override
952 if (e
.fpm_mode
== aarch64_sve::FPM_set
)
953 icode
= code_for_aarch64_sve_dot (e
.result_mode ());
956 /* In the optab, the multiplication operands come before the accumulator
957 operand. The optab is keyed off the multiplication mode. */
958 e
.rotate_inputs_left (0, 3);
959 if (e
.type_suffix_ids
[1] == NUM_TYPE_SUFFIXES
)
960 icode
= e
.convert_optab_handler_for_sign (sdot_prod_optab
,
963 GET_MODE (e
.args
[0]));
965 icode
= (e
.type_suffix (0).float_p
966 ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
967 : e
.type_suffix (0).unsigned_p
968 ? CODE_FOR_udot_prodvnx4sivnx8hi
969 : CODE_FOR_sdot_prodvnx4sivnx8hi
);
971 return e
.use_unpred_insn (icode
);
975 class svdotprod_lane_impl
: public unspec_based_function_base
978 using unspec_based_function_base::unspec_based_function_base
;
981 expand (function_expander
&e
) const override
984 machine_mode mode0
= GET_MODE (e
.args
[0]);
985 machine_mode mode1
= GET_MODE (e
.args
[1]);
986 if (e
.fpm_mode
== aarch64_sve::FPM_set
)
988 icode
= code_for_aarch64_sve_dot_lane (mode0
);
992 /* Use the same ordering as the dot_prod_optab, with the
994 e
.rotate_inputs_left (0, 4);
995 int unspec
= unspec_for (e
);
996 if (unspec
== UNSPEC_FDOT
)
997 icode
= CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf
;
999 icode
= code_for_aarch64_dot_prod_lane (unspec
, mode0
, mode1
);
1001 return e
.use_exact_insn (icode
);
1005 class svdup_impl
: public quiet
<function_base
>
1009 fold (gimple_folder
&f
) const override
1011 tree vec_type
= TREE_TYPE (f
.lhs
);
1012 tree rhs
= gimple_call_arg (f
.call
, f
.pred
== PRED_none
? 0 : 1);
1014 if (f
.pred
== PRED_none
|| f
.pred
== PRED_x
)
1016 if (CONSTANT_CLASS_P (rhs
))
1018 if (f
.type_suffix (0).bool_p
)
1019 return (tree_to_shwi (rhs
)
1020 ? f
.fold_to_ptrue ()
1021 : f
.fold_to_pfalse ());
1023 tree rhs_vector
= build_vector_from_val (vec_type
, rhs
);
1024 return gimple_build_assign (f
.lhs
, rhs_vector
);
1027 /* Avoid folding _b to a VEC_DUPLICATE_EXPR, since to do that we
1028 would need to introduce an extra and unwanted conversion to
1029 the truth vector element type. */
1030 if (!f
.type_suffix (0).bool_p
)
1031 return gimple_build_assign (f
.lhs
, VEC_DUPLICATE_EXPR
, rhs
);
1034 /* svdup_z (pg, x) == VEC_COND_EXPR <pg, VEC_DUPLICATE_EXPR <x>, 0>. */
1035 if (f
.pred
== PRED_z
)
1037 gimple_seq stmts
= NULL
;
1038 tree pred
= f
.convert_pred (stmts
, vec_type
, 0);
1039 rhs
= f
.force_vector (stmts
, vec_type
, rhs
);
1040 gsi_insert_seq_before (f
.gsi
, stmts
, GSI_SAME_STMT
);
1041 return gimple_build_assign (f
.lhs
, VEC_COND_EXPR
, pred
, rhs
,
1042 build_zero_cst (vec_type
));
1049 expand (function_expander
&e
) const override
1051 if (e
.pred
== PRED_none
|| e
.pred
== PRED_x
)
1052 /* There's no benefit to using predicated instructions for _x here. */
1053 return e
.use_unpred_insn (e
.direct_optab_handler (vec_duplicate_optab
));
1055 /* Model predicated svdups as a SEL in which the "true" value is
1056 the duplicate of the function argument and the "false" value
1057 is the value of inactive lanes. */
1059 machine_mode mode
= e
.vector_mode (0);
1060 if (valid_for_const_vector_p (GET_MODE_INNER (mode
), e
.args
.last ()))
1061 /* Duplicate the constant to fill a vector. The pattern optimizes
1062 various cases involving constant operands, falling back to SEL
1064 icode
= code_for_vcond_mask (mode
, mode
);
1066 /* Use the pattern for selecting between a duplicated scalar
1067 variable and a vector fallback. */
1068 icode
= code_for_aarch64_sel_dup (mode
);
1069 return e
.use_vcond_mask_insn (icode
);
1073 class svdup_lane_impl
: public quiet
<function_base
>
1077 expand (function_expander
&e
) const override
1079 /* The native DUP lane has an index range of 64 bytes. */
1080 machine_mode mode
= e
.vector_mode (0);
1081 if (CONST_INT_P (e
.args
[1])
1082 && IN_RANGE (INTVAL (e
.args
[1]) * GET_MODE_UNIT_SIZE (mode
), 0, 63))
1083 return e
.use_exact_insn (code_for_aarch64_sve_dup_lane (mode
));
1085 /* Treat svdup_lane as if it were svtbl_n. */
1086 return e
.use_exact_insn (code_for_aarch64_sve (UNSPEC_TBL
,
1087 e
.vector_mode (0)));
1091 class svdupq_impl
: public quiet
<function_base
>
1095 fold_nonconst_dupq (gimple_folder
&f
) const
1097 /* Lower lhs = svdupq (arg0, arg1, ..., argN} into:
1098 tmp = {arg0, arg1, ..., arg<N-1>}
1099 lhs = VEC_PERM_EXPR (tmp, tmp, {0, 1, 2, N-1, ...}) */
1101 if (f
.type_suffix (0).bool_p
1102 || BYTES_BIG_ENDIAN
)
1105 tree lhs
= gimple_call_lhs (f
.call
);
1106 tree lhs_type
= TREE_TYPE (lhs
);
1107 tree elt_type
= TREE_TYPE (lhs_type
);
1108 scalar_mode elt_mode
= SCALAR_TYPE_MODE (elt_type
);
1109 machine_mode vq_mode
= aarch64_v128_mode (elt_mode
).require ();
1110 tree vq_type
= build_vector_type_for_mode (elt_type
, vq_mode
);
1112 unsigned nargs
= gimple_call_num_args (f
.call
);
1113 vec
<constructor_elt
, va_gc
> *v
;
1114 vec_alloc (v
, nargs
);
1115 for (unsigned i
= 0; i
< nargs
; i
++)
1116 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, gimple_call_arg (f
.call
, i
));
1117 tree vec
= build_constructor (vq_type
, v
);
1118 tree tmp
= make_ssa_name_fn (cfun
, vq_type
, 0);
1119 gimple
*g
= gimple_build_assign (tmp
, vec
);
1121 gimple_seq stmts
= NULL
;
1122 gimple_seq_add_stmt_without_update (&stmts
, g
);
1124 poly_uint64 lhs_len
= TYPE_VECTOR_SUBPARTS (lhs_type
);
1125 vec_perm_builder
sel (lhs_len
, nargs
, 1);
1126 for (unsigned i
= 0; i
< nargs
; i
++)
1129 vec_perm_indices
indices (sel
, 1, nargs
);
1130 tree mask_type
= build_vector_type (ssizetype
, lhs_len
);
1131 tree mask
= vec_perm_indices_to_tree (mask_type
, indices
);
1133 gimple
*g2
= gimple_build_assign (lhs
, VEC_PERM_EXPR
, tmp
, tmp
, mask
);
1134 gimple_seq_add_stmt_without_update (&stmts
, g2
);
1135 gsi_replace_with_seq (f
.gsi
, stmts
, false);
1141 fold (gimple_folder
&f
) const override
1143 tree vec_type
= TREE_TYPE (f
.lhs
);
1144 unsigned int nargs
= gimple_call_num_args (f
.call
);
1145 /* For predicates, pad out each argument so that we have one element
1147 unsigned int factor
= (f
.type_suffix (0).bool_p
1148 ? f
.type_suffix (0).element_bytes
: 1);
1149 tree_vector_builder
builder (vec_type
, nargs
* factor
, 1);
1150 for (unsigned int i
= 0; i
< nargs
; ++i
)
1152 tree elt
= gimple_call_arg (f
.call
, i
);
1153 if (!CONSTANT_CLASS_P (elt
))
1154 return fold_nonconst_dupq (f
);
1155 builder
.quick_push (elt
);
1156 for (unsigned int j
= 1; j
< factor
; ++j
)
1157 builder
.quick_push (build_zero_cst (TREE_TYPE (vec_type
)));
1159 return gimple_build_assign (f
.lhs
, builder
.build ());
1163 expand (function_expander
&e
) const override
1165 machine_mode mode
= e
.vector_mode (0);
1166 unsigned int elements_per_vq
= e
.args
.length ();
1167 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
1169 /* Construct a vector of integers so that we can compare them against
1170 zero below. Zero vs. nonzero is the only distinction that
1172 mode
= aarch64_sve_int_mode (mode
);
1173 for (unsigned int i
= 0; i
< elements_per_vq
; ++i
)
1174 e
.args
[i
] = simplify_gen_unary (ZERO_EXTEND
, GET_MODE_INNER (mode
),
1178 /* Get the 128-bit Advanced SIMD vector for this data size. */
1179 scalar_mode element_mode
= GET_MODE_INNER (mode
);
1180 machine_mode vq_mode
= aarch64_v128_mode (element_mode
).require ();
1181 gcc_assert (known_eq (elements_per_vq
, GET_MODE_NUNITS (vq_mode
)));
1183 /* Put the arguments into a 128-bit Advanced SIMD vector. We want
1184 argument N to go into architectural lane N, whereas Advanced SIMD
1185 vectors are loaded memory lsb to register lsb. We therefore need
1186 to reverse the elements for big-endian targets. */
1187 rtx vq_reg
= gen_reg_rtx (vq_mode
);
1188 rtvec vec
= rtvec_alloc (elements_per_vq
);
1189 for (unsigned int i
= 0; i
< elements_per_vq
; ++i
)
1191 unsigned int argno
= BYTES_BIG_ENDIAN
? elements_per_vq
- i
- 1 : i
;
1192 RTVEC_ELT (vec
, i
) = e
.args
[argno
];
1194 aarch64_expand_vector_init (vq_reg
, gen_rtx_PARALLEL (vq_mode
, vec
));
1196 /* If the result is a boolean, compare the data vector against zero. */
1197 if (mode
!= e
.vector_mode (0))
1199 rtx data_dupq
= aarch64_expand_sve_dupq (NULL
, mode
, vq_reg
);
1200 return aarch64_convert_sve_data_to_pred (e
.possible_target
,
1201 e
.vector_mode (0), data_dupq
);
1204 return aarch64_expand_sve_dupq (e
.possible_target
, mode
, vq_reg
);
1208 class svdupq_lane_impl
: public quiet
<function_base
>
1212 expand (function_expander
&e
) const override
1214 machine_mode mode
= e
.vector_mode (0);
1215 rtx index
= e
.args
[1];
1216 if (CONST_INT_P (index
) && IN_RANGE (INTVAL (index
), 0, 3))
1218 /* Use the .Q form of DUP, which is the native instruction for
1220 insn_code icode
= code_for_aarch64_sve_dupq_lane (mode
);
1221 unsigned int num_indices
= e
.elements_per_vq (0);
1222 rtx indices
= aarch64_gen_stepped_int_parallel
1223 (num_indices
, INTVAL (index
) * num_indices
, 1);
1225 e
.add_output_operand (icode
);
1226 e
.add_input_operand (icode
, e
.args
[0]);
1227 e
.add_fixed_operand (indices
);
1228 return e
.generate_insn (icode
);
1231 /* Build a .D TBL index for the pairs of doublewords that we want to
1233 if (CONST_INT_P (index
))
1235 /* The index vector is a constant. */
1236 rtx_vector_builder
builder (VNx2DImode
, 2, 1);
1237 builder
.quick_push (gen_int_mode (INTVAL (index
) * 2, DImode
));
1238 builder
.quick_push (gen_int_mode (INTVAL (index
) * 2 + 1, DImode
));
1239 index
= builder
.build ();
1243 /* Duplicate INDEX * 2 to fill a DImode vector. The ACLE spec
1244 explicitly allows the top of the index to be dropped. */
1245 index
= force_reg (DImode
, simplify_gen_binary (ASHIFT
, DImode
,
1246 index
, const1_rtx
));
1247 index
= expand_vector_broadcast (VNx2DImode
, index
);
1249 /* Get an alternating 0, 1 predicate. */
1250 rtx_vector_builder
builder (VNx2BImode
, 2, 1);
1251 builder
.quick_push (const0_rtx
);
1252 builder
.quick_push (constm1_rtx
);
1253 rtx pg
= force_reg (VNx2BImode
, builder
.build ());
1255 /* Add one to the odd elements of the index. */
1256 rtx one
= force_reg (VNx2DImode
, CONST1_RTX (VNx2DImode
));
1257 rtx target
= gen_reg_rtx (VNx2DImode
);
1258 emit_insn (gen_cond_addvnx2di (target
, pg
, index
, one
, index
));
1262 e
.args
[0] = gen_lowpart (VNx2DImode
, e
.args
[0]);
1264 return e
.use_exact_insn (CODE_FOR_aarch64_sve_tblvnx2di
);
1268 class sveorv_impl
: public reduction
1271 CONSTEXPR
sveorv_impl () : reduction (UNSPEC_XORV
) {}
1274 fold (gimple_folder
&f
) const override
1276 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
1277 return f
.fold_call_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
1282 /* Implements svextb, svexth and svextw. */
1283 class svext_bhw_impl
: public function_base
1286 CONSTEXPR
svext_bhw_impl (scalar_int_mode from_mode
)
1287 : m_from_mode (from_mode
) {}
1290 expand (function_expander
&e
) const override
1292 if (e
.type_suffix (0).unsigned_p
)
1294 /* Convert to an AND. The widest we go is 0xffffffff, which fits
1296 e
.args
.quick_push (GEN_INT (GET_MODE_MASK (m_from_mode
)));
1297 if (e
.pred
== PRED_m
)
1298 /* We now have arguments "(inactive, pg, op, mask)". Convert this
1299 to "(pg, op, mask, inactive)" so that the order matches svand_m
1300 with an extra argument on the end. Take the inactive elements
1301 from this extra argument. */
1302 e
.rotate_inputs_left (0, 4);
1303 return e
.map_to_rtx_codes (AND
, AND
, -1, -1, 3);
1306 machine_mode wide_mode
= e
.vector_mode (0);
1307 poly_uint64 nunits
= GET_MODE_NUNITS (wide_mode
);
1308 machine_mode narrow_mode
1309 = aarch64_sve_data_mode (m_from_mode
, nunits
).require ();
1310 if (e
.pred
== PRED_x
)
1312 insn_code icode
= code_for_aarch64_pred_sxt (wide_mode
, narrow_mode
);
1313 return e
.use_pred_x_insn (icode
);
1316 insn_code icode
= code_for_aarch64_cond_sxt (wide_mode
, narrow_mode
);
1317 return e
.use_cond_insn (icode
);
1320 /* The element mode that we're extending from. */
1321 scalar_int_mode m_from_mode
;
1324 /* Implements svget2, svget3 and svget4. */
1325 class svget_impl
: public quiet
<multi_vector_function
>
1328 using quiet
<multi_vector_function
>::quiet
;
1331 fold (gimple_folder
&f
) const override
1333 /* Fold into a normal gimple component access. */
1334 tree rhs_tuple
= gimple_call_arg (f
.call
, 0);
1335 tree index
= gimple_call_arg (f
.call
, 1);
1336 tree field
= tuple_type_field (TREE_TYPE (rhs_tuple
));
1337 tree rhs_array
= build3 (COMPONENT_REF
, TREE_TYPE (field
),
1338 rhs_tuple
, field
, NULL_TREE
);
1339 tree rhs_vector
= build4 (ARRAY_REF
, TREE_TYPE (f
.lhs
),
1340 rhs_array
, index
, NULL_TREE
, NULL_TREE
);
1341 return gimple_build_assign (f
.lhs
, rhs_vector
);
1345 expand (function_expander
&e
) const override
1347 /* Fold the access into a subreg rvalue. */
1348 return force_subreg (e
.vector_mode (0), e
.args
[0], GET_MODE (e
.args
[0]),
1349 INTVAL (e
.args
[1]) * BYTES_PER_SVE_VECTOR
);
1353 class svget_neonq_impl
: public function_base
1357 fold (gimple_folder
&f
) const override
1359 if (BYTES_BIG_ENDIAN
)
1361 tree rhs_sve_vector
= gimple_call_arg (f
.call
, 0);
1362 tree rhs_vector
= build3 (BIT_FIELD_REF
, TREE_TYPE (f
.lhs
),
1363 rhs_sve_vector
, bitsize_int (128), bitsize_int (0));
1364 return gimple_build_assign (f
.lhs
, rhs_vector
);
1368 expand (function_expander
&e
) const override
1370 if (BYTES_BIG_ENDIAN
)
1372 machine_mode mode
= e
.vector_mode (0);
1373 insn_code icode
= code_for_aarch64_sve_get_neonq (mode
);
1374 unsigned int nunits
= 128 / GET_MODE_UNIT_BITSIZE (mode
);
1375 rtx indices
= aarch64_gen_stepped_int_parallel
1376 (nunits
, nunits
- 1, -1);
1378 e
.add_output_operand (icode
);
1379 e
.add_input_operand (icode
, e
.args
[0]);
1380 e
.add_fixed_operand (indices
);
1381 return e
.generate_insn (icode
);
1383 return force_subreg (e
.result_mode (), e
.args
[0], GET_MODE (e
.args
[0]), 0);
1387 class svset_neonq_impl
: public function_base
1391 expand (function_expander
&e
) const override
1393 machine_mode mode
= e
.vector_mode (0);
1395 /* If the SVE argument is undefined, we just need to reinterpret the
1396 Advanced SIMD argument as an SVE vector. */
1397 if (!BYTES_BIG_ENDIAN
1398 && is_undef (CALL_EXPR_ARG (e
.call_expr
, 0)))
1399 return force_subreg (mode
, e
.args
[1], GET_MODE (e
.args
[1]), 0);
1401 rtx_vector_builder
builder (VNx16BImode
, 16, 2);
1402 for (unsigned int i
= 0; i
< 16; i
++)
1403 builder
.quick_push (CONST1_RTX (BImode
));
1404 for (unsigned int i
= 0; i
< 16; i
++)
1405 builder
.quick_push (CONST0_RTX (BImode
));
1406 e
.args
.quick_push (builder
.build ());
1407 if (BYTES_BIG_ENDIAN
)
1408 return e
.use_exact_insn (code_for_aarch64_sve_set_neonq (mode
));
1409 insn_code icode
= code_for_vcond_mask (mode
, mode
);
1410 e
.args
[1] = force_lowpart_subreg (mode
, e
.args
[1], GET_MODE (e
.args
[1]));
1411 e
.add_output_operand (icode
);
1412 e
.add_input_operand (icode
, e
.args
[1]);
1413 e
.add_input_operand (icode
, e
.args
[0]);
1414 e
.add_input_operand (icode
, e
.args
[2]);
1415 return e
.generate_insn (icode
);
1419 class svdup_neonq_impl
: public function_base
1423 fold (gimple_folder
&f
) const override
1425 if (BYTES_BIG_ENDIAN
)
1427 tree rhs_vector
= gimple_call_arg (f
.call
, 0);
1428 unsigned HOST_WIDE_INT neon_nelts
1429 = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs_vector
)).to_constant ();
1430 poly_uint64 sve_nelts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (f
.lhs
));
1431 vec_perm_builder
builder (sve_nelts
, neon_nelts
, 1);
1432 for (unsigned int i
= 0; i
< neon_nelts
; i
++)
1433 builder
.quick_push (i
);
1434 vec_perm_indices
indices (builder
, 1, neon_nelts
);
1435 tree perm_type
= build_vector_type (ssizetype
, sve_nelts
);
1436 return gimple_build_assign (f
.lhs
, VEC_PERM_EXPR
,
1439 vec_perm_indices_to_tree (perm_type
, indices
));
1443 expand (function_expander
&e
) const override
1445 machine_mode mode
= e
.vector_mode (0);
1446 if (BYTES_BIG_ENDIAN
)
1448 insn_code icode
= code_for_aarch64_vec_duplicate_vq_be (mode
);
1449 unsigned int nunits
= 128 / GET_MODE_UNIT_BITSIZE (mode
);
1450 rtx indices
= aarch64_gen_stepped_int_parallel
1451 (nunits
, nunits
- 1, -1);
1453 e
.add_output_operand (icode
);
1454 e
.add_input_operand (icode
, e
.args
[0]);
1455 e
.add_fixed_operand (indices
);
1456 return e
.generate_insn (icode
);
1458 insn_code icode
= code_for_aarch64_vec_duplicate_vq_le (mode
);
1459 e
.add_output_operand (icode
);
1460 e
.add_input_operand (icode
, e
.args
[0]);
1461 return e
.generate_insn (icode
);
1465 class svindex_impl
: public function_base
1469 fold (gimple_folder
&f
) const override
1471 /* Apply constant folding if base and step are integer constants. */
1472 tree vec_type
= TREE_TYPE (f
.lhs
);
1473 tree base
= gimple_call_arg (f
.call
, 0);
1474 tree step
= gimple_call_arg (f
.call
, 1);
1475 if (TREE_CODE (base
) != INTEGER_CST
|| TREE_CODE (step
) != INTEGER_CST
)
1477 return gimple_build_assign (f
.lhs
,
1478 build_vec_series (vec_type
, base
, step
));
1483 expand (function_expander
&e
) const override
1485 return e
.use_exact_insn (e
.direct_optab_handler (vec_series_optab
));
1489 class svinsr_impl
: public quiet
<function_base
>
1493 fold (gimple_folder
&f
) const override
1495 gcall
*new_call
= gimple_build_call_internal (IFN_VEC_SHL_INSERT
, 2,
1496 gimple_call_arg (f
.call
, 0),
1497 gimple_call_arg (f
.call
, 1));
1498 gimple_call_set_lhs (new_call
, f
.lhs
);
1503 expand (function_expander
&e
) const override
1505 insn_code icode
= direct_optab_handler (vec_shl_insert_optab
,
1507 return e
.use_exact_insn (icode
);
1511 /* Implements svlasta and svlastb. */
1512 class svlast_impl
: public quiet
<function_base
>
1515 CONSTEXPR
svlast_impl (int unspec
) : m_unspec (unspec
) {}
1517 bool is_lasta () const { return m_unspec
== UNSPEC_LASTA
; }
1518 bool is_lastb () const { return m_unspec
== UNSPEC_LASTB
; }
1520 /* Fold a svlast{a/b} call with constant predicate to a BIT_FIELD_REF.
1521 BIT_FIELD_REF lowers to Advanced SIMD element extract, so we have to
1522 ensure the index of the element being accessed is in the range of a
1523 Advanced SIMD vector width. */
1525 fold (gimple_folder
& f
) const override
1527 tree pred
= gimple_call_arg (f
.call
, 0);
1528 tree val
= gimple_call_arg (f
.call
, 1);
1530 if (TREE_CODE (pred
) == VECTOR_CST
)
1534 int step
= f
.type_suffix (0).element_bytes
;
1535 int step_1
= gcd (step
, VECTOR_CST_NPATTERNS (pred
));
1536 int npats
= VECTOR_CST_NPATTERNS (pred
);
1537 unsigned enelts
= vector_cst_encoded_nelts (pred
);
1539 unsigned HOST_WIDE_INT nelts
;
1541 /* We can optimize 2 cases common to variable and fixed-length cases
1542 without a linear search of the predicate vector:
1543 1. LASTA if predicate is all true, return element 0.
1544 2. LASTA if predicate all false, return element 0. */
1545 if (is_lasta () && vector_cst_all_same (pred
, step_1
))
1547 b
= build3 (BIT_FIELD_REF
, TREE_TYPE (f
.lhs
), val
,
1548 bitsize_int (step
* BITS_PER_UNIT
), bitsize_int (0));
1549 return gimple_build_assign (f
.lhs
, b
);
1552 /* Handle the all-false case for LASTB where SVE VL == 128b -
1553 return the highest numbered element. */
1554 if (is_lastb () && known_eq (BYTES_PER_SVE_VECTOR
, 16)
1555 && vector_cst_all_same (pred
, step_1
)
1556 && integer_zerop (VECTOR_CST_ENCODED_ELT (pred
, 0)))
1558 b
= build3 (BIT_FIELD_REF
, TREE_TYPE (f
.lhs
), val
,
1559 bitsize_int (step
* BITS_PER_UNIT
),
1560 bitsize_int ((16 - step
) * BITS_PER_UNIT
));
1562 return gimple_build_assign (f
.lhs
, b
);
1565 /* Determine if there are any repeating non-zero elements in variable
1567 if (!VECTOR_CST_NELTS (pred
).is_constant (&nelts
))
1569 /* If VECTOR_CST_NELTS_PER_PATTERN (pred) == 2 and every multiple of
1571 [VECTOR_CST_NPATTERNS .. VECTOR_CST_ENCODED_NELTS - 1]
1572 is zero, then we can treat the vector as VECTOR_CST_NPATTERNS
1573 elements followed by all inactive elements. */
1574 if (VECTOR_CST_NELTS_PER_PATTERN (pred
) == 2)
1576 /* Restrict the scope of search to NPATS if vector is
1577 variable-length for linear search later. */
1579 for (unsigned j
= npats
; j
< enelts
; j
+= step_1
)
1581 /* If there are active elements in the repeated pattern of a
1582 variable-length vector, then return NULL as there is no
1583 way to be sure statically if this falls within the
1584 Advanced SIMD range. */
1585 if (!integer_zerop (VECTOR_CST_ENCODED_ELT (pred
, j
)))
1590 /* If we're here, it means that for NELTS_PER_PATTERN != 2, there
1591 is a repeating non-zero element. */
1595 /* If we're here, it means either:
1596 1. The vector is variable-length and there's no active element in the
1597 repeated part of the pattern, or
1598 2. The vector is fixed-length.
1600 Fall through to finding the last active element linearly for
1601 for all cases where the last active element is known to be
1602 within a statically-determinable range. */
1603 i
= MAX ((int)nelts
- step
, 0);
1604 for (; i
>= 0; i
-= step
)
1605 if (!integer_zerop (VECTOR_CST_ELT (pred
, i
)))
1610 /* For LASTB, the element is the last active element. */
1615 /* For LASTA, the element is one after last active element. */
1618 /* If last active element is
1619 last element, wrap-around and return first Advanced SIMD
1621 if (known_ge (pos
, BYTES_PER_SVE_VECTOR
))
1625 /* Out of Advanced SIMD range. */
1626 if (pos
< 0 || pos
> 15)
1629 b
= build3 (BIT_FIELD_REF
, TREE_TYPE (f
.lhs
), val
,
1630 bitsize_int (step
* BITS_PER_UNIT
),
1631 bitsize_int (pos
* BITS_PER_UNIT
));
1633 return gimple_build_assign (f
.lhs
, b
);
1639 expand (function_expander
&e
) const override
1641 return e
.use_exact_insn (code_for_extract (m_unspec
, e
.vector_mode (0)));
1644 /* The unspec code associated with the operation. */
1648 class svld1_impl
: public full_width_access
1652 call_properties (const function_instance
&) const override
1654 return CP_READ_MEMORY
;
1658 fold (gimple_folder
&f
) const override
1660 if (f
.vectors_per_tuple () != 1)
1663 tree vectype
= f
.vector_type (0);
1665 /* Get the predicate and base pointer. */
1666 gimple_seq stmts
= NULL
;
1667 tree pred
= f
.convert_pred (stmts
, vectype
, 0);
1668 tree base
= f
.fold_contiguous_base (stmts
, vectype
);
1669 tree els
= build_zero_cst (vectype
);
1670 gsi_insert_seq_before (f
.gsi
, stmts
, GSI_SAME_STMT
);
1672 tree cookie
= f
.load_store_cookie (TREE_TYPE (vectype
));
1673 gcall
*new_call
= gimple_build_call_internal (IFN_MASK_LOAD
, 4,
1674 base
, cookie
, pred
, els
);
1675 gimple_call_set_lhs (new_call
, f
.lhs
);
1680 expand (function_expander
&e
) const override
1683 if (e
.vectors_per_tuple () == 1)
1684 icode
= convert_optab_handler (maskload_optab
,
1685 e
.vector_mode (0), e
.gp_mode (0));
1687 icode
= code_for_aarch64 (UNSPEC_LD1_COUNT
, e
.tuple_mode (0));
1688 return e
.use_contiguous_load_insn (icode
, true);
1692 /* Implements extending contiguous forms of svld1. */
1693 class svld1_extend_impl
: public extending_load
1696 using extending_load::extending_load
;
1699 expand (function_expander
&e
) const override
1701 insn_code icode
= code_for_aarch64_load (extend_rtx_code (),
1703 e
.memory_vector_mode ());
1704 return e
.use_contiguous_load_insn (icode
, true);
1708 class svld1_gather_impl
: public full_width_access
1712 call_properties (const function_instance
&) const override
1714 return CP_READ_MEMORY
;
1718 expand (function_expander
&e
) const override
1720 e
.prepare_gather_address_operands (1);
1721 /* Put the predicate last, as required by mask_gather_load_optab. */
1722 e
.rotate_inputs_left (0, 5);
1723 /* Add the else operand. */
1724 e
.args
.quick_push (CONST0_RTX (e
.vector_mode (0)));
1725 machine_mode mem_mode
= e
.memory_vector_mode ();
1726 machine_mode int_mode
= aarch64_sve_int_mode (mem_mode
);
1727 insn_code icode
= convert_optab_handler (mask_gather_load_optab
,
1728 mem_mode
, int_mode
);
1729 return e
.use_exact_insn (icode
);
1733 /* Implements extending forms of svld1_gather. */
1734 class svld1_gather_extend_impl
: public extending_load
1737 using extending_load::extending_load
;
1740 expand (function_expander
&e
) const override
1742 e
.prepare_gather_address_operands (1);
1743 /* Put the predicate last, since the extending gathers use the same
1744 operand order as mask_gather_load_optab. */
1745 e
.rotate_inputs_left (0, 5);
1746 /* Add a constant predicate for the extension rtx. */
1747 e
.args
.quick_push (CONSTM1_RTX (VNx16BImode
));
1748 /* Add the else operand. */
1749 e
.args
.quick_push (CONST0_RTX (e
.vector_mode (1)));
1750 insn_code icode
= code_for_aarch64_gather_load (extend_rtx_code (),
1752 e
.memory_vector_mode ());
1753 return e
.use_exact_insn (icode
);
1757 class load_replicate
: public function_base
1761 call_properties (const function_instance
&) const override
1763 return CP_READ_MEMORY
;
1767 memory_scalar_type (const function_instance
&fi
) const override
1769 return fi
.scalar_type (0);
1773 class svld1rq_impl
: public load_replicate
1777 memory_vector_mode (const function_instance
&fi
) const override
1779 return aarch64_v128_mode (GET_MODE_INNER (fi
.vector_mode (0))).require ();
1783 expand (function_expander
&e
) const override
1785 insn_code icode
= code_for_aarch64_sve_ld1rq (e
.vector_mode (0));
1786 return e
.use_contiguous_load_insn (icode
);
1790 fold (gimple_folder
&f
) const override
1792 tree arg0
= gimple_call_arg (f
.call
, 0);
1793 tree arg1
= gimple_call_arg (f
.call
, 1);
1796 lhs = svld1rq ({-1, -1, ... }, arg1)
1798 tmp = mem_ref<vectype> [(elem * {ref-all}) arg1]
1799 lhs = vec_perm_expr<tmp, tmp, {0, 1, 2, 3, ...}>.
1800 on little endian target.
1801 vectype is the corresponding ADVSIMD type. */
1803 if (!BYTES_BIG_ENDIAN
1804 && integer_all_onesp (arg0
)
1805 && !flag_non_call_exceptions
)
1807 tree lhs
= gimple_call_lhs (f
.call
);
1808 tree lhs_type
= TREE_TYPE (lhs
);
1809 poly_uint64 lhs_len
= TYPE_VECTOR_SUBPARTS (lhs_type
);
1810 tree eltype
= TREE_TYPE (lhs_type
);
1812 scalar_mode elmode
= GET_MODE_INNER (TYPE_MODE (lhs_type
));
1813 machine_mode vq_mode
= aarch64_v128_mode (elmode
).require ();
1814 tree vectype
= build_vector_type_for_mode (eltype
, vq_mode
);
1817 = build_pointer_type_for_mode (eltype
, VOIDmode
, true);
1818 tree zero
= build_zero_cst (elt_ptr_type
);
1820 /* Use element type alignment. */
1822 = build_aligned_type (vectype
, TYPE_ALIGN (eltype
));
1824 tree mem_ref_lhs
= make_ssa_name_fn (cfun
, access_type
, 0);
1825 tree mem_ref_op
= fold_build2 (MEM_REF
, access_type
, arg1
, zero
);
1826 gimple
*mem_ref_stmt
1827 = gimple_build_assign (mem_ref_lhs
, mem_ref_op
);
1829 gimple_seq stmts
= NULL
;
1830 gimple_seq_add_stmt_without_update (&stmts
, mem_ref_stmt
);
1832 int source_nelts
= TYPE_VECTOR_SUBPARTS (access_type
).to_constant ();
1833 vec_perm_builder
sel (lhs_len
, source_nelts
, 1);
1834 for (int i
= 0; i
< source_nelts
; i
++)
1837 vec_perm_indices
indices (sel
, 1, source_nelts
);
1838 gcc_checking_assert (can_vec_perm_const_p (TYPE_MODE (lhs_type
),
1839 TYPE_MODE (access_type
),
1841 tree mask_type
= build_vector_type (ssizetype
, lhs_len
);
1842 tree mask
= vec_perm_indices_to_tree (mask_type
, indices
);
1843 gimple
*g2
= gimple_build_assign (lhs
, VEC_PERM_EXPR
,
1844 mem_ref_lhs
, mem_ref_lhs
, mask
);
1845 gimple_seq_add_stmt_without_update (&stmts
, g2
);
1846 gsi_replace_with_seq_vops (f
.gsi
, stmts
);
1854 class svld1ro_impl
: public load_replicate
1858 memory_vector_mode (const function_instance
&) const override
1864 expand (function_expander
&e
) const override
1866 insn_code icode
= code_for_aarch64_sve_ld1ro (e
.vector_mode (0));
1867 return e
.use_contiguous_load_insn (icode
);
1871 /* Implements svld2, svld3 and svld4. */
1872 class svld234_impl
: public full_width_access
1875 using full_width_access::full_width_access
;
1878 call_properties (const function_instance
&) const override
1880 return CP_READ_MEMORY
;
1884 fold (gimple_folder
&f
) const override
1886 tree tuple_type
= TREE_TYPE (f
.lhs
);
1887 tree vectype
= f
.vector_type (0);
1889 /* Get the predicate and base pointer. */
1890 gimple_seq stmts
= NULL
;
1891 tree pred
= f
.convert_pred (stmts
, vectype
, 0);
1892 tree els
= build_zero_cst (vectype
);
1893 tree base
= f
.fold_contiguous_base (stmts
, vectype
);
1894 gsi_insert_seq_before (f
.gsi
, stmts
, GSI_SAME_STMT
);
1896 /* Emit two statements: a clobber of the lhs, so that it isn't
1897 upwards exposed, and then the load itself.
1899 The fold routines expect the replacement statement to have the
1900 same lhs as the original call, so return the clobber statement
1901 rather than the load. */
1902 gimple
*clobber
= gimple_build_assign (f
.lhs
, build_clobber (tuple_type
));
1904 /* View the loaded data as an array of vectors. */
1905 tree field
= tuple_type_field (tuple_type
);
1906 tree lhs_array
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (field
),
1907 unshare_expr (f
.lhs
));
1909 /* Emit the load itself. */
1910 tree cookie
= f
.load_store_cookie (TREE_TYPE (vectype
));
1911 gcall
*new_call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 4,
1912 base
, cookie
, pred
, els
);
1913 gimple_call_set_lhs (new_call
, lhs_array
);
1914 gsi_insert_after (f
.gsi
, new_call
, GSI_SAME_STMT
);
1920 expand (function_expander
&e
) const override
1922 machine_mode tuple_mode
= e
.result_mode ();
1923 insn_code icode
= convert_optab_handler (vec_mask_load_lanes_optab
,
1924 tuple_mode
, e
.vector_mode (0));
1925 return e
.use_contiguous_load_insn (icode
, true);
1929 class svldff1_gather_impl
: public full_width_access
1933 call_properties (const function_instance
&) const override
1935 return CP_READ_MEMORY
| CP_READ_FFR
| CP_WRITE_FFR
;
1939 expand (function_expander
&e
) const override
1941 /* See the block comment in aarch64-sve.md for details about the
1943 emit_insn (gen_aarch64_update_ffr_for_load ());
1945 e
.prepare_gather_address_operands (1);
1946 /* Put the predicate last, since ldff1_gather uses the same operand
1947 order as mask_gather_load_optab. */
1948 e
.rotate_inputs_left (0, 5);
1949 machine_mode mem_mode
= e
.memory_vector_mode ();
1950 return e
.use_exact_insn (code_for_aarch64_ldff1_gather (mem_mode
));
1954 /* Implements extending forms of svldff1_gather. */
1955 class svldff1_gather_extend
: public extending_load
1958 using extending_load::extending_load
;
1961 expand (function_expander
&e
) const override
1963 /* See the block comment in aarch64-sve.md for details about the
1965 emit_insn (gen_aarch64_update_ffr_for_load ());
1967 e
.prepare_gather_address_operands (1);
1968 /* Put the predicate last, since ldff1_gather uses the same operand
1969 order as mask_gather_load_optab. */
1970 e
.rotate_inputs_left (0, 5);
1971 /* Add a constant predicate for the extension rtx. */
1972 e
.args
.quick_push (CONSTM1_RTX (VNx16BImode
));
1973 insn_code icode
= code_for_aarch64_ldff1_gather (extend_rtx_code (),
1975 e
.memory_vector_mode ());
1976 return e
.use_exact_insn (icode
);
1980 class svldnt1_impl
: public full_width_access
1984 call_properties (const function_instance
&) const override
1986 return CP_READ_MEMORY
;
1990 expand (function_expander
&e
) const override
1992 insn_code icode
= (e
.vectors_per_tuple () == 1
1993 ? code_for_aarch64_ldnt1 (e
.vector_mode (0))
1994 : code_for_aarch64 (UNSPEC_LDNT1_COUNT
,
1996 return e
.use_contiguous_load_insn (icode
, true);
2000 /* Implements svldff1 and svldnf1. */
2001 class svldxf1_impl
: public full_width_access
2004 CONSTEXPR
svldxf1_impl (int unspec
) : m_unspec (unspec
) {}
2007 call_properties (const function_instance
&) const override
2009 return CP_READ_MEMORY
| CP_READ_FFR
| CP_WRITE_FFR
;
2013 expand (function_expander
&e
) const override
2015 /* See the block comment in aarch64-sve.md for details about the
2017 emit_insn (gen_aarch64_update_ffr_for_load ());
2019 machine_mode mode
= e
.vector_mode (0);
2020 return e
.use_contiguous_load_insn (code_for_aarch64_ldf1 (m_unspec
, mode
));
2023 /* The unspec associated with the load. */
2027 /* Implements extending contiguous forms of svldff1 and svldnf1. */
2028 class svldxf1_extend_impl
: public extending_load
2031 CONSTEXPR
svldxf1_extend_impl (type_suffix_index memory_type
, int unspec
)
2032 : extending_load (memory_type
), m_unspec (unspec
) {}
2035 call_properties (const function_instance
&) const override
2037 return CP_READ_MEMORY
| CP_READ_FFR
| CP_WRITE_FFR
;
2041 expand (function_expander
&e
) const override
2043 /* See the block comment in aarch64-sve.md for details about the
2045 emit_insn (gen_aarch64_update_ffr_for_load ());
2047 insn_code icode
= code_for_aarch64_ldf1 (m_unspec
, extend_rtx_code (),
2049 e
.memory_vector_mode ());
2050 return e
.use_contiguous_load_insn (icode
);
2053 /* The unspec associated with the load. */
2057 class svlen_impl
: public quiet
<function_base
>
2061 fold (gimple_folder
&f
) const override
2063 /* The argument only exists for its type. */
2064 tree rhs_type
= TREE_TYPE (gimple_call_arg (f
.call
, 0));
2065 tree count
= build_int_cstu (TREE_TYPE (f
.lhs
),
2066 TYPE_VECTOR_SUBPARTS (rhs_type
));
2067 return gimple_build_assign (f
.lhs
, count
);
2071 expand (function_expander
&e
) const override
2073 /* The argument only exists for its type. */
2074 return gen_int_mode (GET_MODE_NUNITS (e
.vector_mode (0)), DImode
);
2078 class svlsl_impl
: public rtx_code_function
2081 CONSTEXPR
svlsl_impl ()
2082 : rtx_code_function (ASHIFT
, ASHIFT
) {}
2085 fold (gimple_folder
&f
) const override
2087 return f
.fold_const_binary (LSHIFT_EXPR
);
2091 class svmad_impl
: public function_base
2095 expand (function_expander
&e
) const override
2097 return expand_mad (e
);
2101 class svminv_impl
: public reduction
2104 CONSTEXPR
svminv_impl ()
2105 : reduction (UNSPEC_SMINV
, UNSPEC_UMINV
, UNSPEC_FMINV
) {}
2108 fold (gimple_folder
&f
) const override
2110 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
2112 tree rhs
= f
.type_suffix (0).integer_p
2113 ? TYPE_MAX_VALUE (TREE_TYPE (f
.lhs
))
2114 : build_real (TREE_TYPE (f
.lhs
), dconstinf
);
2115 return f
.fold_call_to (rhs
);
2121 class svmaxnmv_impl
: public reduction
2124 CONSTEXPR
svmaxnmv_impl () : reduction (UNSPEC_FMAXNMV
) {}
2126 fold (gimple_folder
&f
) const override
2128 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
2130 REAL_VALUE_TYPE rnan
= dconst0
;
2132 return f
.fold_call_to (build_real (TREE_TYPE (f
.lhs
), rnan
));
2138 class svmaxv_impl
: public reduction
2141 CONSTEXPR
svmaxv_impl ()
2142 : reduction (UNSPEC_SMAXV
, UNSPEC_UMAXV
, UNSPEC_FMAXV
) {}
2145 fold (gimple_folder
&f
) const override
2147 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
2149 tree rhs
= f
.type_suffix (0).integer_p
2150 ? TYPE_MIN_VALUE (TREE_TYPE (f
.lhs
))
2151 : build_real (TREE_TYPE (f
.lhs
), dconstninf
);
2152 return f
.fold_call_to (rhs
);
2158 class svminnmv_impl
: public reduction
2161 CONSTEXPR
svminnmv_impl () : reduction (UNSPEC_FMINNMV
) {}
2163 fold (gimple_folder
&f
) const override
2165 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
2167 REAL_VALUE_TYPE rnan
= dconst0
;
2169 return f
.fold_call_to (build_real (TREE_TYPE (f
.lhs
), rnan
));
2175 class svmla_impl
: public function_base
2179 expand (function_expander
&e
) const override
2181 /* Put the accumulator at the end (argument 3), but keep it as the
2182 merge input for _m functions. */
2183 e
.rotate_inputs_left (1, 4);
2184 return expand_mad (e
, 3);
2188 class svmla_lane_impl
: public function_base
2192 expand (function_expander
&e
) const override
2194 if (e
.type_suffix (0).integer_p
)
2196 machine_mode mode
= e
.vector_mode (0);
2197 return e
.use_exact_insn (code_for_aarch64_sve_add_mul_lane (mode
));
2199 return expand_mla_mls_lane (e
, UNSPEC_FMLA
);
2203 class svmls_impl
: public function_base
2207 expand (function_expander
&e
) const override
2209 /* Put the accumulator at the end (argument 3), but keep it as the
2210 merge input for _m functions. */
2211 e
.rotate_inputs_left (1, 4);
2212 return expand_msb (e
, 3);
2216 class svmov_impl
: public function_base
2220 fold (gimple_folder
&f
) const override
2222 return gimple_build_assign (f
.lhs
, BIT_AND_EXPR
,
2223 gimple_call_arg (f
.call
, 0),
2224 gimple_call_arg (f
.call
, 1));
2228 expand (function_expander
&e
) const override
2230 /* The canonical form for the assembler alias "MOV Pa.B, Pb/Z, Pc.B"
2231 is "AND Pa.B, Pb/Z, Pc.B, Pc.B". */
2232 gcc_assert (e
.pred
== PRED_z
);
2233 e
.args
.quick_push (e
.args
[1]);
2234 return e
.use_exact_insn (CODE_FOR_aarch64_pred_andvnx16bi_z
);
2238 class svmls_lane_impl
: public function_base
2242 expand (function_expander
&e
) const override
2244 if (e
.type_suffix (0).integer_p
)
2246 machine_mode mode
= e
.vector_mode (0);
2247 return e
.use_exact_insn (code_for_aarch64_sve_sub_mul_lane (mode
));
2249 return expand_mla_mls_lane (e
, UNSPEC_FMLS
);
2253 class svmmla_impl
: public function_base
2257 expand (function_expander
&e
) const override
2260 if (e
.type_suffix (0).integer_p
)
2262 if (e
.type_suffix (0).unsigned_p
)
2263 icode
= code_for_aarch64_sve_add (UNSPEC_UMATMUL
, e
.vector_mode (0));
2265 icode
= code_for_aarch64_sve_add (UNSPEC_SMATMUL
, e
.vector_mode (0));
2268 icode
= code_for_aarch64_sve (UNSPEC_FMMLA
, e
.vector_mode (0));
2269 return e
.use_exact_insn (icode
);
2273 class svmsb_impl
: public function_base
2277 expand (function_expander
&e
) const override
2279 return expand_msb (e
);
2283 class svmul_impl
: public rtx_code_function
2286 CONSTEXPR
svmul_impl ()
2287 : rtx_code_function (MULT
, MULT
, UNSPEC_COND_FMUL
) {}
2290 fold (gimple_folder
&f
) const override
2292 if (auto *res
= f
.fold_const_binary (MULT_EXPR
))
2295 /* If one of the operands is all ones, fold to other operand. */
2296 tree op1
= gimple_call_arg (f
.call
, 1);
2297 tree op2
= gimple_call_arg (f
.call
, 2);
2298 if (integer_onep (op1
))
2299 return f
.fold_active_lanes_to (op2
);
2300 if (integer_onep (op2
))
2301 return f
.fold_active_lanes_to (op1
);
2303 /* If one of the operands is all zeros, fold to zero vector. */
2304 if (integer_zerop (op1
) || integer_zerop (op2
))
2305 return f
.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
2307 /* If one of the operands is all integer -1, fold to svneg. */
2308 if (integer_minus_onep (op1
) || integer_minus_onep (op2
))
2310 auto mul_by_m1
= [](gimple_folder
&f
, tree lhs_conv
,
2311 vec
<tree
> &args_conv
) -> gimple
*
2313 gcc_assert (lhs_conv
&& args_conv
.length () == 3);
2314 tree pg
= args_conv
[0];
2315 tree op1
= args_conv
[1];
2316 tree op2
= args_conv
[2];
2317 tree negated_op
= op1
;
2318 if (integer_minus_onep (op1
))
2320 type_suffix_pair signed_tsp
=
2321 {find_type_suffix (TYPE_signed
, f
.type_suffix (0).element_bits
),
2322 f
.type_suffix_ids
[1]};
2323 function_instance
instance ("svneg", functions::svneg
,
2324 shapes::unary
, MODE_none
, signed_tsp
,
2325 GROUP_none
, f
.pred
, FPM_unused
);
2326 gcall
*call
= f
.redirect_call (instance
);
2327 gimple_call_set_lhs (call
, lhs_conv
);
2328 unsigned offset
= 0;
2329 if (f
.pred
== PRED_m
)
2332 gimple_call_set_arg (call
, 0, op1
);
2335 gimple_set_num_ops (call
, 5);
2336 gimple_call_set_arg (call
, offset
, pg
);
2337 gimple_call_set_arg (call
, offset
+ 1, negated_op
);
2341 get_vector_type (find_type_suffix (TYPE_signed
,
2342 f
.type_suffix (0).element_bits
));
2343 return f
.convert_and_fold (ty
, mul_by_m1
);
2346 /* If one of the operands is a uniform power of 2, fold to a left shift
2348 tree pg
= gimple_call_arg (f
.call
, 0);
2349 tree op1_cst
= uniform_integer_cst_p (op1
);
2350 tree op2_cst
= uniform_integer_cst_p (op2
);
2351 tree shift_op1
, shift_op2
= NULL
;
2352 if (op1_cst
&& integer_pow2p (op1_cst
)
2353 && (f
.pred
!= PRED_m
2354 || is_ptrue (pg
, f
.type_suffix (0).element_bytes
)))
2357 shift_op2
= op1_cst
;
2359 else if (op2_cst
&& integer_pow2p (op2_cst
))
2362 shift_op2
= op2_cst
;
2369 shift_op2
= wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2
)),
2370 tree_log2 (shift_op2
));
2371 function_instance
instance ("svlsl", functions::svlsl
,
2372 shapes::binary_uint_opt_n
, MODE_n
,
2373 f
.type_suffix_ids
, GROUP_none
, f
.pred
,
2375 gcall
*call
= f
.redirect_call (instance
);
2376 gimple_call_set_arg (call
, 1, shift_op1
);
2377 gimple_call_set_arg (call
, 2, shift_op2
);
2385 class svnand_impl
: public function_base
2389 expand (function_expander
&e
) const override
2391 gcc_assert (e
.pred
== PRED_z
);
2392 return e
.use_exact_insn (CODE_FOR_aarch64_pred_nandvnx16bi_z
);
2396 class svnor_impl
: public function_base
2400 expand (function_expander
&e
) const override
2402 gcc_assert (e
.pred
== PRED_z
);
2403 return e
.use_exact_insn (CODE_FOR_aarch64_pred_norvnx16bi_z
);
2407 class svnot_impl
: public rtx_code_function
2410 CONSTEXPR
svnot_impl () : rtx_code_function (NOT
, NOT
, -1) {}
2413 expand (function_expander
&e
) const override
2415 if (e
.type_suffix_ids
[0] == TYPE_SUFFIX_b
)
2417 /* The canonical form for the assembler alias "NOT Pa.B, Pb/Z, Pc.B"
2418 is "EOR Pa.B, Pb/Z, Pb.B, Pc.B". */
2419 gcc_assert (e
.pred
== PRED_z
);
2420 e
.args
.quick_insert (1, e
.args
[0]);
2421 return e
.use_exact_insn (CODE_FOR_aarch64_pred_xorvnx16bi_z
);
2423 return rtx_code_function::expand (e
);
2427 class svorn_impl
: public function_base
2431 expand (function_expander
&e
) const override
2433 gcc_assert (e
.pred
== PRED_z
);
2434 return e
.use_exact_insn (CODE_FOR_aarch64_pred_ornvnx16bi_z
);
2438 class svorv_impl
: public reduction
2441 CONSTEXPR
svorv_impl () : reduction (UNSPEC_IORV
) {}
2444 fold (gimple_folder
&f
) const override
2446 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
2447 return f
.fold_call_to (build_zero_cst (TREE_TYPE (f
.lhs
)));
2452 class svpfalse_impl
: public function_base
2456 fold (gimple_folder
&f
) const override
2458 if (f
.type_suffix (0).tclass
== TYPE_bool
)
2459 return f
.fold_to_pfalse ();
2465 expand (function_expander
&) const override
2467 return CONST0_RTX (VNx16BImode
);
2471 /* Implements svpfirst and svpnext, which share the same .md patterns. */
2472 class svpfirst_svpnext_impl
: public function_base
2475 CONSTEXPR
svpfirst_svpnext_impl (int unspec
) : m_unspec (unspec
) {}
2477 fold (gimple_folder
&f
) const override
2479 tree pg
= gimple_call_arg (f
.call
, 0);
2481 return f
.fold_call_to (m_unspec
== UNSPEC_PFIRST
2482 ? gimple_call_arg (f
.call
, 1)
2488 expand (function_expander
&e
) const override
2490 machine_mode mode
= e
.vector_mode (0);
2491 e
.add_ptrue_hint (0, mode
);
2492 return e
.use_exact_insn (code_for_aarch64_sve (m_unspec
, mode
));
2495 /* The unspec associated with the operation. */
2499 /* Implements contiguous forms of svprf[bhwd]. */
2500 class svprf_bhwd_impl
: public function_base
2503 CONSTEXPR
svprf_bhwd_impl (machine_mode mode
) : m_mode (mode
) {}
2506 call_properties (const function_instance
&) const override
2508 return CP_PREFETCH_MEMORY
;
2512 expand (function_expander
&e
) const override
2514 e
.prepare_prefetch_operands ();
2515 insn_code icode
= code_for_aarch64_sve_prefetch (m_mode
);
2516 return e
.use_contiguous_prefetch_insn (icode
);
2519 /* The mode that we'd use to hold one vector of prefetched data. */
2520 machine_mode m_mode
;
2523 /* Implements svprf[bhwd]_gather. */
2524 class svprf_bhwd_gather_impl
: public function_base
2527 CONSTEXPR
svprf_bhwd_gather_impl (machine_mode mode
) : m_mode (mode
) {}
2530 call_properties (const function_instance
&) const override
2532 return CP_PREFETCH_MEMORY
;
2536 memory_vector_mode (const function_instance
&) const override
2542 expand (function_expander
&e
) const override
2544 e
.prepare_prefetch_operands ();
2545 e
.prepare_gather_address_operands (1);
2547 /* Insert a zero operand to identify the mode of the memory being
2548 accessed. This goes between the gather operands and prefetch
2549 operands created above. */
2550 e
.args
.quick_insert (5, CONST0_RTX (m_mode
));
2552 machine_mode reg_mode
= GET_MODE (e
.args
[2]);
2553 insn_code icode
= code_for_aarch64_sve_gather_prefetch (m_mode
, reg_mode
);
2554 return e
.use_exact_insn (icode
);
2557 /* The mode that we'd use to hold one vector of prefetched data. */
2558 machine_mode m_mode
;
2561 /* Implements svptest_any, svptest_first and svptest_last. */
2562 class svptest_impl
: public function_base
2565 CONSTEXPR
svptest_impl (rtx_code compare
) : m_compare (compare
) {}
2567 fold (gimple_folder
&f
) const override
2569 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
2570 return f
.fold_call_to (boolean_false_node
);
2575 expand (function_expander
&e
) const override
2577 /* See whether GP is an exact ptrue for some predicate mode;
2578 i.e. whether converting the GP to that mode will not drop
2579 set bits and will leave all significant bits set. */
2580 machine_mode wide_mode
;
2582 if (aarch64_ptrue_all_mode (e
.args
[0]).exists (&wide_mode
))
2583 hint
= SVE_KNOWN_PTRUE
;
2586 hint
= SVE_MAYBE_NOT_PTRUE
;
2587 wide_mode
= VNx16BImode
;
2590 /* Generate the PTEST itself. */
2591 rtx pg
= force_reg (VNx16BImode
, e
.args
[0]);
2592 rtx wide_pg
= gen_lowpart (wide_mode
, pg
);
2593 rtx hint_rtx
= gen_int_mode (hint
, DImode
);
2594 rtx op
= force_reg (wide_mode
, gen_lowpart (wide_mode
, e
.args
[1]));
2595 emit_insn (gen_aarch64_ptestvnx16bi (pg
, wide_pg
, hint_rtx
, op
));
2597 /* Get the location of the boolean result. We can provide SImode and
2598 DImode values directly; rely on generic code to convert others. */
2599 rtx target
= e
.possible_target
;
2602 || (GET_MODE (target
) != SImode
&& GET_MODE (target
) != DImode
))
2603 target
= gen_reg_rtx (DImode
);
2605 /* Generate a CSET to convert the CC result of the PTEST to a boolean. */
2606 rtx cc_reg
= gen_rtx_REG (CC_NZCmode
, CC_REGNUM
);
2607 rtx compare
= gen_rtx_fmt_ee (m_compare
, GET_MODE (target
),
2608 cc_reg
, const0_rtx
);
2609 emit_insn (gen_rtx_SET (target
, compare
));
2613 /* The comparison code associated with ptest condition. */
2617 class svptrue_impl
: public function_base
2621 fold (gimple_folder
&f
) const override
2623 if (f
.type_suffix (0).tclass
== TYPE_bool
)
2624 return f
.fold_to_ptrue ();
2630 expand (function_expander
&e
) const override
2632 if (e
.type_suffix (0).tclass
== TYPE_bool
)
2633 return aarch64_ptrue_all (e
.type_suffix (0).element_bytes
);
2635 auto bits
= e
.type_suffix (0).element_bits
;
2636 return e
.use_exact_insn (code_for_aarch64_sve_ptrue_c (bits
));
2640 class svptrue_pat_impl
: public function_base
2644 fold (gimple_folder
&f
) const override
2646 tree pattern_arg
= gimple_call_arg (f
.call
, 0);
2647 aarch64_svpattern pattern
= (aarch64_svpattern
) tree_to_shwi (pattern_arg
);
2649 if (pattern
== AARCH64_SV_ALL
)
2650 /* svptrue_pat_bN (SV_ALL) == svptrue_bN (). */
2651 return f
.fold_to_ptrue ();
2653 /* See whether we can count the number of elements in the pattern
2654 at compile time. If so, construct a predicate with that number
2655 of 1s followed by all 0s. */
2656 int nelts_per_vq
= f
.elements_per_vq (0);
2657 HOST_WIDE_INT value
= aarch64_fold_sve_cnt_pat (pattern
, nelts_per_vq
);
2659 return f
.fold_to_vl_pred (value
);
2665 expand (function_expander
&e
) const override
2667 /* In rtl, the predicate is represented as the constant:
2669 (const:V16BI (unspec:V16BI [(const_int PATTERN)
2670 (const_vector:VnnBI [zeros])]
2673 where nn determines the element size. */
2674 rtvec vec
= gen_rtvec (2, e
.args
[0], CONST0_RTX (e
.vector_mode (0)));
2675 return gen_rtx_CONST (VNx16BImode
,
2676 gen_rtx_UNSPEC (VNx16BImode
, vec
, UNSPEC_PTRUE
));
2680 /* Implements svqdec[bhwd]{,_pat} and svqinc[bhwd]{,_pat}. */
2681 class svqdec_svqinc_bhwd_impl
: public function_base
2684 CONSTEXPR
svqdec_svqinc_bhwd_impl (rtx_code code_for_sint
,
2685 rtx_code code_for_uint
,
2686 scalar_int_mode elem_mode
)
2687 : m_code_for_sint (code_for_sint
),
2688 m_code_for_uint (code_for_uint
),
2689 m_elem_mode (elem_mode
)
2693 expand (function_expander
&e
) const override
2695 /* Treat non-_pat functions in the same way as _pat functions with
2696 an SV_ALL argument. */
2697 if (e
.args
.length () == 2)
2698 e
.args
.quick_insert (1, gen_int_mode (AARCH64_SV_ALL
, DImode
));
2700 /* Insert the number of elements per 128-bit block as a fake argument,
2701 between the pattern and the multiplier. Arguments 1, 2 and 3 then
2702 correspond exactly with the 3 UNSPEC_SVE_CNT_PAT operands; see
2703 aarch64_sve_cnt_pat for details. */
2704 unsigned int elements_per_vq
= 128 / GET_MODE_BITSIZE (m_elem_mode
);
2705 e
.args
.quick_insert (2, gen_int_mode (elements_per_vq
, DImode
));
2707 rtx_code code
= (e
.type_suffix (0).unsigned_p
2711 /* Choose between operating on integer scalars or integer vectors. */
2712 machine_mode mode
= e
.vector_mode (0);
2713 if (e
.mode_suffix_id
== MODE_n
)
2714 mode
= GET_MODE_INNER (mode
);
2715 return e
.use_exact_insn (code_for_aarch64_sve_pat (code
, mode
));
2718 /* The saturating addition or subtraction codes to use for signed and
2719 unsigned values respectively. */
2720 rtx_code m_code_for_sint
;
2721 rtx_code m_code_for_uint
;
2723 /* The integer mode associated with the [bhwd] suffix. */
2724 scalar_int_mode m_elem_mode
;
2727 /* Implements svqdec[bhwd]{,_pat}. */
2728 class svqdec_bhwd_impl
: public svqdec_svqinc_bhwd_impl
2731 CONSTEXPR
svqdec_bhwd_impl (scalar_int_mode elem_mode
)
2732 : svqdec_svqinc_bhwd_impl (SS_MINUS
, US_MINUS
, elem_mode
) {}
2735 /* Implements svqinc[bhwd]{,_pat}. */
2736 class svqinc_bhwd_impl
: public svqdec_svqinc_bhwd_impl
2739 CONSTEXPR
svqinc_bhwd_impl (scalar_int_mode elem_mode
)
2740 : svqdec_svqinc_bhwd_impl (SS_PLUS
, US_PLUS
, elem_mode
) {}
2743 /* Implements svqdecp and svqincp. */
2744 class svqdecp_svqincp_impl
: public function_base
2747 CONSTEXPR
svqdecp_svqincp_impl (rtx_code code_for_sint
,
2748 rtx_code code_for_uint
)
2749 : m_code_for_sint (code_for_sint
),
2750 m_code_for_uint (code_for_uint
)
2754 expand (function_expander
&e
) const override
2756 rtx_code code
= (e
.type_suffix (0).unsigned_p
2760 if (e
.mode_suffix_id
== MODE_n
)
2762 /* Increment or decrement a scalar (whose mode is given by the first
2763 type suffix) by the number of active elements in a predicate
2764 (whose mode is given by the second type suffix). */
2765 machine_mode mode
= GET_MODE_INNER (e
.vector_mode (0));
2766 icode
= code_for_aarch64_sve_cntp (code
, mode
, e
.vector_mode (1));
2769 /* Increment a vector by the number of active elements in a predicate,
2770 with the vector mode determining the predicate mode. */
2771 icode
= code_for_aarch64_sve_cntp (code
, e
.vector_mode (0));
2772 return e
.use_exact_insn (icode
);
2775 /* The saturating addition or subtraction codes to use for signed and
2776 unsigned values respectively. */
2777 rtx_code m_code_for_sint
;
2778 rtx_code m_code_for_uint
;
2781 class svrdffr_impl
: public function_base
2785 call_properties (const function_instance
&) const override
2791 expand (function_expander
&e
) const override
2793 /* See the block comment in aarch64-sve.md for details about the
2795 emit_insn (gen_aarch64_copy_ffr_to_ffrt ());
2796 rtx result
= e
.use_exact_insn (e
.pred
== PRED_z
2797 ? CODE_FOR_aarch64_rdffr_z
2798 : CODE_FOR_aarch64_rdffr
);
2799 emit_insn (gen_aarch64_update_ffrt ());
2804 class svreinterpret_impl
: public quiet
<function_base
>
2808 fold (gimple_folder
&f
) const override
2810 if (f
.vectors_per_tuple () > 1)
2813 /* Punt to rtl if the effect of the reinterpret on registers does not
2814 conform to GCC's endianness model. */
2815 if (GET_MODE_CLASS (f
.vector_mode (0)) != MODE_VECTOR_BOOL
2816 && !targetm
.can_change_mode_class (f
.vector_mode (0),
2817 f
.vector_mode (1), FP_REGS
))
2820 /* Otherwise svreinterpret corresponds directly to a VIEW_CONVERT_EXPR
2821 reinterpretation. */
2822 tree rhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (f
.lhs
),
2823 gimple_call_arg (f
.call
, 0));
2824 return gimple_build_assign (f
.lhs
, VIEW_CONVERT_EXPR
, rhs
);
2828 expand (function_expander
&e
) const override
2830 machine_mode mode
= e
.tuple_mode (0);
2831 /* Handle svbool_t <-> svcount_t. */
2832 if (mode
== e
.tuple_mode (1))
2834 return e
.use_exact_insn (code_for_aarch64_sve_reinterpret (mode
));
2838 class svrev_impl
: public permute
2842 fold (gimple_folder
&f
) const override
2844 /* Punt for now on _b16 and wider; we'd need more complex evpc logic
2845 to rerecognize the result. */
2846 if (f
.type_suffix (0).bool_p
&& f
.type_suffix (0).element_bits
> 8)
2849 /* Permute as { nelts - 1, nelts - 2, nelts - 3, ... }. */
2850 poly_int64 nelts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (f
.lhs
));
2851 vec_perm_builder
builder (nelts
, 1, 3);
2852 for (int i
= 0; i
< 3; ++i
)
2853 builder
.quick_push (nelts
- i
- 1);
2854 return fold_permute (f
, builder
);
2858 expand (function_expander
&e
) const override
2860 return e
.use_exact_insn (code_for_aarch64_sve_rev (e
.vector_mode (0)));
2864 class svrint_impl
: public function_base
2867 CONSTEXPR
svrint_impl (optab_tag optab
, int cond_unspec
)
2868 : m_optab (optab
), m_cond_unspec (cond_unspec
)
2872 expand (function_expander
&e
) const override
2874 if (e
.pred
== PRED_none
)
2876 auto icode
= direct_optab_handler (m_optab
, e
.tuple_mode (0));
2877 return e
.use_exact_insn (icode
);
2879 return e
.map_to_unspecs (-1, -1, m_cond_unspec
);
2886 class svsel_impl
: public quiet
<function_base
>
2890 fold (gimple_folder
&f
) const override
2892 if (f
.vectors_per_tuple () > 1)
2895 /* svsel corresponds exactly to VEC_COND_EXPR. */
2896 gimple_seq stmts
= NULL
;
2897 tree pred
= f
.convert_pred (stmts
, f
.vector_type (0), 0);
2898 gsi_insert_seq_before (f
.gsi
, stmts
, GSI_SAME_STMT
);
2899 return gimple_build_assign (f
.lhs
, VEC_COND_EXPR
, pred
,
2900 gimple_call_arg (f
.call
, 1),
2901 gimple_call_arg (f
.call
, 2));
2905 expand (function_expander
&e
) const override
2907 /* svsel (cond, truev, falsev) is vcond_mask (truev, falsev, cond). */
2908 e
.rotate_inputs_left (0, 3);
2909 insn_code icode
= (e
.vectors_per_tuple () > 1
2910 ? code_for_aarch64_sve_sel (e
.tuple_mode (0))
2911 : convert_optab_handler (vcond_mask_optab
,
2914 return e
.use_exact_insn (icode
);
2918 /* Implements svset2, svset3 and svset4. */
2919 class svset_impl
: public quiet
<multi_vector_function
>
2922 using quiet
<multi_vector_function
>::quiet
;
2925 fold (gimple_folder
&f
) const override
2927 tree rhs_tuple
= gimple_call_arg (f
.call
, 0);
2928 tree index
= gimple_call_arg (f
.call
, 1);
2929 tree rhs_vector
= gimple_call_arg (f
.call
, 2);
2931 /* Replace the call with two statements: a copy of the full tuple
2932 to the call result, followed by an update of the individual vector.
2934 The fold routines expect the replacement statement to have the
2935 same lhs as the original call, so return the copy statement
2936 rather than the field update. */
2937 gassign
*copy
= gimple_build_assign (unshare_expr (f
.lhs
), rhs_tuple
);
2939 /* Get a reference to the individual vector. */
2940 tree field
= tuple_type_field (TREE_TYPE (f
.lhs
));
2941 tree lhs_array
= build3 (COMPONENT_REF
, TREE_TYPE (field
),
2942 f
.lhs
, field
, NULL_TREE
);
2943 tree lhs_vector
= build4 (ARRAY_REF
, TREE_TYPE (rhs_vector
),
2944 lhs_array
, index
, NULL_TREE
, NULL_TREE
);
2945 gassign
*update
= gimple_build_assign (lhs_vector
, rhs_vector
);
2946 gsi_insert_after (f
.gsi
, update
, GSI_SAME_STMT
);
2952 expand (function_expander
&e
) const override
2954 rtx rhs_tuple
= e
.args
[0];
2955 unsigned int index
= INTVAL (e
.args
[1]);
2956 rtx rhs_vector
= e
.args
[2];
2958 /* First copy the full tuple to the target register. */
2959 rtx lhs_tuple
= e
.get_nonoverlapping_reg_target ();
2960 emit_move_insn (lhs_tuple
, rhs_tuple
);
2962 /* ...then update the individual vector. */
2963 rtx lhs_vector
= simplify_gen_subreg (GET_MODE (rhs_vector
),
2964 lhs_tuple
, GET_MODE (lhs_tuple
),
2965 index
* BYTES_PER_SVE_VECTOR
);
2966 emit_move_insn (lhs_vector
, rhs_vector
);
2971 class svsetffr_impl
: public function_base
2975 call_properties (const function_instance
&) const override
2977 return CP_WRITE_FFR
;
2981 expand (function_expander
&e
) const override
2983 e
.args
.quick_push (CONSTM1_RTX (VNx16BImode
));
2984 return e
.use_exact_insn (CODE_FOR_aarch64_wrffr
);
2988 class svsplice_impl
: public QUIET_CODE_FOR_MODE0 (aarch64_sve_splice
)
2992 fold (gimple_folder
&f
) const override
2994 if (is_pfalse (gimple_call_arg (f
.call
, 0)))
2995 return f
.fold_call_to (gimple_call_arg (f
.call
, 2));
3000 class svst1_impl
: public full_width_access
3004 call_properties (const function_instance
&) const override
3006 return CP_WRITE_MEMORY
;
3010 fold (gimple_folder
&f
) const override
3012 if (f
.vectors_per_tuple () != 1)
3015 tree vectype
= f
.vector_type (0);
3017 /* Get the predicate and base pointer. */
3018 gimple_seq stmts
= NULL
;
3019 tree pred
= f
.convert_pred (stmts
, vectype
, 0);
3020 tree base
= f
.fold_contiguous_base (stmts
, vectype
);
3021 gsi_insert_seq_before (f
.gsi
, stmts
, GSI_SAME_STMT
);
3023 tree cookie
= f
.load_store_cookie (TREE_TYPE (vectype
));
3024 tree rhs
= gimple_call_arg (f
.call
, gimple_call_num_args (f
.call
) - 1);
3025 return gimple_build_call_internal (IFN_MASK_STORE
, 4,
3026 base
, cookie
, pred
, rhs
);
3030 expand (function_expander
&e
) const override
3033 if (e
.vectors_per_tuple () == 1)
3034 icode
= convert_optab_handler (maskstore_optab
,
3035 e
.vector_mode (0), e
.gp_mode (0));
3037 icode
= code_for_aarch64 (UNSPEC_ST1_COUNT
, e
.tuple_mode (0));
3038 return e
.use_contiguous_store_insn (icode
);
3042 class svst1_scatter_impl
: public full_width_access
3046 call_properties (const function_instance
&) const override
3048 return CP_WRITE_MEMORY
;
3052 expand (function_expander
&e
) const override
3054 e
.prepare_gather_address_operands (1);
3055 /* Put the predicate last, as required by mask_scatter_store_optab. */
3056 e
.rotate_inputs_left (0, 6);
3057 machine_mode mem_mode
= e
.memory_vector_mode ();
3058 machine_mode int_mode
= aarch64_sve_int_mode (mem_mode
);
3059 insn_code icode
= convert_optab_handler (mask_scatter_store_optab
,
3060 mem_mode
, int_mode
);
3061 return e
.use_exact_insn (icode
);
3065 /* Implements truncating forms of svst1_scatter. */
3066 class svst1_scatter_truncate_impl
: public truncating_store
3069 using truncating_store::truncating_store
;
3072 expand (function_expander
&e
) const override
3074 e
.prepare_gather_address_operands (1);
3075 /* Put the predicate last, since the truncating scatters use the same
3076 operand order as mask_scatter_store_optab. */
3077 e
.rotate_inputs_left (0, 6);
3078 insn_code icode
= code_for_aarch64_scatter_store_trunc
3079 (e
.memory_vector_mode (), e
.vector_mode (0));
3080 return e
.use_exact_insn (icode
);
3084 /* Implements truncating contiguous forms of svst1. */
3085 class svst1_truncate_impl
: public truncating_store
3088 using truncating_store::truncating_store
;
3091 expand (function_expander
&e
) const override
3093 insn_code icode
= code_for_aarch64_store_trunc (e
.memory_vector_mode (),
3095 return e
.use_contiguous_store_insn (icode
);
3099 /* Implements svst2, svst3 and svst4. */
3100 class svst234_impl
: public full_width_access
3103 using full_width_access::full_width_access
;
3106 call_properties (const function_instance
&) const override
3108 return CP_WRITE_MEMORY
;
3112 fold (gimple_folder
&f
) const override
3114 tree vectype
= f
.vector_type (0);
3116 /* Get the predicate and base pointer. */
3117 gimple_seq stmts
= NULL
;
3118 tree pred
= f
.convert_pred (stmts
, vectype
, 0);
3119 tree base
= f
.fold_contiguous_base (stmts
, vectype
);
3120 gsi_insert_seq_before (f
.gsi
, stmts
, GSI_SAME_STMT
);
3122 /* View the stored data as an array of vectors. */
3123 unsigned int num_args
= gimple_call_num_args (f
.call
);
3124 tree rhs_tuple
= gimple_call_arg (f
.call
, num_args
- 1);
3125 tree field
= tuple_type_field (TREE_TYPE (rhs_tuple
));
3126 tree rhs_array
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (field
), rhs_tuple
);
3128 tree cookie
= f
.load_store_cookie (TREE_TYPE (vectype
));
3129 return gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
3130 base
, cookie
, pred
, rhs_array
);
3134 expand (function_expander
&e
) const override
3136 machine_mode tuple_mode
= GET_MODE (e
.args
.last ());
3137 insn_code icode
= convert_optab_handler (vec_mask_store_lanes_optab
,
3138 tuple_mode
, e
.vector_mode (0));
3139 return e
.use_contiguous_store_insn (icode
);
3143 class svstnt1_impl
: public full_width_access
3147 call_properties (const function_instance
&) const override
3149 return CP_WRITE_MEMORY
;
3153 expand (function_expander
&e
) const override
3155 insn_code icode
= (e
.vectors_per_tuple () == 1
3156 ? code_for_aarch64_stnt1 (e
.vector_mode (0))
3157 : code_for_aarch64 (UNSPEC_STNT1_COUNT
,
3159 return e
.use_contiguous_store_insn (icode
);
3163 class svsub_impl
: public rtx_code_function
3166 CONSTEXPR
svsub_impl ()
3167 : rtx_code_function (MINUS
, MINUS
, UNSPEC_COND_FSUB
) {}
3170 expand (function_expander
&e
) const override
3172 /* Canonicalize subtractions of constants to additions. */
3173 machine_mode mode
= e
.vector_mode (0);
3174 if (e
.try_negating_argument (2, mode
))
3175 return e
.map_to_rtx_codes (PLUS
, PLUS
, UNSPEC_COND_FADD
, -1);
3177 return rtx_code_function::expand (e
);
3181 /* Implements svtrn1 and svtrn2. */
3182 class svtrn_impl
: public binary_permute
3185 CONSTEXPR
svtrn_impl (int base
)
3186 : binary_permute (base
? UNSPEC_TRN2
: UNSPEC_TRN1
), m_base (base
) {}
3189 fold (gimple_folder
&f
) const override
3191 /* svtrn1: { 0, nelts, 2, nelts + 2, 4, nelts + 4, ... }
3192 svtrn2: as for svtrn1, but with 1 added to each index. */
3193 poly_uint64 nelts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (f
.lhs
));
3194 vec_perm_builder
builder (nelts
, 2, 3);
3195 for (unsigned int i
= 0; i
< 3; ++i
)
3197 builder
.quick_push (m_base
+ i
* 2);
3198 builder
.quick_push (m_base
+ i
* 2 + nelts
);
3200 return fold_permute (f
, builder
);
3203 /* 0 for svtrn1, 1 for svtrn2. */
3204 unsigned int m_base
;
3207 /* Base class for svundef{,2,3,4}. */
3208 class svundef_impl
: public quiet
<multi_vector_function
>
3211 using quiet
<multi_vector_function
>::quiet
;
3214 expand (function_expander
&e
) const override
3216 rtx target
= e
.get_reg_target ();
3217 emit_clobber (copy_rtx (target
));
3222 /* Implements svunpklo and svunpkhi. */
3223 class svunpk_impl
: public quiet
<function_base
>
3226 CONSTEXPR
svunpk_impl (bool high_p
) : m_high_p (high_p
) {}
3229 fold (gimple_folder
&f
) const override
3231 /* Don't fold the predicate ops, since every bit of the svbool_t
3232 result is significant. */
3233 if (f
.type_suffix_ids
[0] == TYPE_SUFFIX_b
)
3236 /* The first half in memory is VEC_UNPACK_LO_EXPR for little-endian
3237 and VEC_UNPACK_HI_EXPR for big-endian. */
3238 bool high_p
= BYTES_BIG_ENDIAN
? !m_high_p
: m_high_p
;
3239 tree_code code
= high_p
? VEC_UNPACK_HI_EXPR
: VEC_UNPACK_LO_EXPR
;
3240 return gimple_build_assign (f
.lhs
, code
, gimple_call_arg (f
.call
, 0));
3244 expand (function_expander
&e
) const override
3246 machine_mode mode
= GET_MODE (e
.args
[0]);
3247 unsigned int unpacku
= m_high_p
? UNSPEC_UNPACKUHI
: UNSPEC_UNPACKULO
;
3248 unsigned int unpacks
= m_high_p
? UNSPEC_UNPACKSHI
: UNSPEC_UNPACKSLO
;
3250 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_BOOL
)
3251 icode
= code_for_aarch64_sve_punpk (unpacku
, mode
);
3254 int unspec
= e
.type_suffix (0).unsigned_p
? unpacku
: unpacks
;
3255 icode
= code_for_aarch64_sve_unpk (unspec
, unspec
, mode
);
3257 return e
.use_exact_insn (icode
);
3260 /* True for svunpkhi, false for svunpklo. */
3264 /* Also implements svsudot. */
3265 class svusdot_impl
: public function_base
3268 CONSTEXPR
svusdot_impl (bool su
) : m_su (su
) {}
3271 expand (function_expander
&e
) const override
3273 /* The implementation of the ACLE function svsudot (for the non-lane
3274 version) is through the USDOT instruction but with the second and third
3277 e
.rotate_inputs_left (1, 3);
3278 /* The ACLE function has the same order requirements as for svdot.
3279 While there's no requirement for the RTL pattern to have the same sort
3280 of order as that for <sur>dot_prod, it's easier to read.
3281 Hence we do the same rotation on arguments as svdot_impl does. */
3282 e
.rotate_inputs_left (0, 3);
3283 machine_mode mode
= e
.vector_mode (0);
3284 insn_code icode
= code_for_dot_prod (UNSPEC_USDOT
, e
.result_mode (), mode
);
3285 return e
.use_exact_insn (icode
);
3292 /* Implements svuzp1 and svuzp2. */
3293 class svuzp_impl
: public binary_permute
3296 CONSTEXPR
svuzp_impl (unsigned int base
)
3297 : binary_permute (base
? UNSPEC_UZP2
: UNSPEC_UZP1
), m_base (base
) {}
3300 fold (gimple_folder
&f
) const override
3302 /* svuzp1: { 0, 2, 4, 6, ... }
3303 svuzp2: { 1, 3, 5, 7, ... }. */
3304 poly_uint64 nelts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (f
.lhs
));
3305 vec_perm_builder
builder (nelts
, 1, 3);
3306 for (unsigned int i
= 0; i
< 3; ++i
)
3307 builder
.quick_push (m_base
+ i
* 2);
3308 return fold_permute (f
, builder
);
3311 /* 0 for svuzp1, 1 for svuzp2. */
3312 unsigned int m_base
;
3315 /* A function_base for svwhilele and svwhilelt functions. */
3316 class svwhilelx_impl
: public while_comparison
3319 CONSTEXPR
svwhilelx_impl (int unspec_for_sint
, int unspec_for_uint
, bool eq_p
)
3320 : while_comparison (unspec_for_sint
, unspec_for_uint
), m_eq_p (eq_p
)
3323 /* Try to fold a call by treating its arguments as constants of type T.
3324 We have already filtered out the degenerate cases of X .LT. MIN
3326 template<typename T
>
3328 fold_type (gimple_folder
&f
) const
3330 /* Only handle cases in which both operands are constant. */
3332 if (!poly_int_tree_p (gimple_call_arg (f
.call
, 0), &arg0
)
3333 || !poly_int_tree_p (gimple_call_arg (f
.call
, 1), &arg1
))
3336 /* Check whether the result is known to be all-false. */
3337 if (m_eq_p
? known_gt (arg0
, arg1
) : known_ge (arg0
, arg1
))
3338 return f
.fold_to_pfalse ();
3340 /* Punt if we can't tell at compile time whether the result
3342 if (m_eq_p
? maybe_gt (arg0
, arg1
) : maybe_ge (arg0
, arg1
))
3345 /* At this point we know the result has at least one set element. */
3346 poly_uint64 diff
= arg1
- arg0
;
3347 poly_uint64 nelts
= GET_MODE_NUNITS (f
.vector_mode (0));
3349 /* Canonicalize the svwhilele form to the svwhilelt form. Subtract
3350 from NELTS rather than adding to DIFF, to prevent overflow. */
3354 /* Check whether the result is known to be all-true. */
3355 if (known_ge (diff
, nelts
))
3356 return f
.fold_to_ptrue ();
3358 /* Punt if DIFF might not be the actual number of set elements
3359 in the result. Conditional equality is fine. */
3360 if (maybe_gt (diff
, nelts
))
3363 /* At this point we know that the predicate will have DIFF set elements
3364 for svwhilelt and DIFF + 1 set elements for svwhilele (which stops
3365 after rather than before ARG1 is reached). See if we can create
3366 the predicate at compile time. */
3367 unsigned HOST_WIDE_INT vl
;
3368 if (diff
.is_constant (&vl
))
3369 /* Overflow is no longer possible after the checks above. */
3370 return f
.fold_to_vl_pred (m_eq_p
? vl
+ 1 : vl
);
3376 fold (gimple_folder
&f
) const override
3378 if (f
.vectors_per_tuple () > 1)
3381 /* Filter out cases where the condition is always true or always false. */
3382 tree arg1
= gimple_call_arg (f
.call
, 1);
3383 if (!m_eq_p
&& operand_equal_p (arg1
, TYPE_MIN_VALUE (TREE_TYPE (arg1
))))
3384 return f
.fold_to_pfalse ();
3385 if (m_eq_p
&& operand_equal_p (arg1
, TYPE_MAX_VALUE (TREE_TYPE (arg1
))))
3386 return f
.fold_to_ptrue ();
3388 if (f
.type_suffix (1).unsigned_p
)
3389 return fold_type
<poly_uint64
> (f
);
3391 return fold_type
<poly_int64
> (f
);
3394 /* True svwhilele, false for svwhilelt. */
3398 class svwrffr_impl
: public function_base
3402 call_properties (const function_instance
&) const override
3404 return CP_WRITE_FFR
;
3408 expand (function_expander
&e
) const override
3410 return e
.use_exact_insn (CODE_FOR_aarch64_wrffr
);
3414 /* Implements svzip1 and svzip2. */
3415 class svzip_impl
: public binary_permute
3418 CONSTEXPR
svzip_impl (unsigned int base
)
3419 : binary_permute (base
? UNSPEC_ZIP2
: UNSPEC_ZIP1
), m_base (base
) {}
3422 fold (gimple_folder
&f
) const override
3424 /* svzip1: { 0, nelts, 1, nelts + 1, 2, nelts + 2, ... }
3425 svzip2: as for svzip1, but with nelts / 2 added to each index. */
3426 poly_uint64 nelts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (f
.lhs
));
3427 poly_uint64 base
= m_base
* exact_div (nelts
, 2);
3428 vec_perm_builder
builder (nelts
, 2, 3);
3429 for (unsigned int i
= 0; i
< 3; ++i
)
3431 builder
.quick_push (base
+ i
);
3432 builder
.quick_push (base
+ i
+ nelts
);
3434 return fold_permute (f
, builder
);
3437 /* 0 for svzip1, 1 for svzip2. */
3438 unsigned int m_base
;
3441 } /* end anonymous namespace */
3443 namespace aarch64_sve
{
3445 FUNCTION (svabd
, svabd_impl
,)
3446 FUNCTION (svabs
, quiet
<rtx_code_function
>, (ABS
, ABS
, UNSPEC_COND_FABS
))
3447 FUNCTION (svacge
, svac_impl
, (UNSPEC_COND_FCMGE
))
3448 FUNCTION (svacgt
, svac_impl
, (UNSPEC_COND_FCMGT
))
3449 FUNCTION (svacle
, svac_impl
, (UNSPEC_COND_FCMLE
))
3450 FUNCTION (svaclt
, svac_impl
, (UNSPEC_COND_FCMLT
))
3451 FUNCTION (svadd
, rtx_code_function
, (PLUS
, PLUS
, UNSPEC_COND_FADD
))
3452 FUNCTION (svadda
, svadda_impl
,)
3453 FUNCTION (svaddv
, svaddv_impl
,)
3454 FUNCTION (svadrb
, svadr_bhwd_impl
, (0))
3455 FUNCTION (svadrd
, svadr_bhwd_impl
, (3))
3456 FUNCTION (svadrh
, svadr_bhwd_impl
, (1))
3457 FUNCTION (svadrw
, svadr_bhwd_impl
, (2))
3458 FUNCTION (svand
, rtx_code_function
, (AND
, AND
))
3459 FUNCTION (svandv
, svandv_impl
,)
3460 FUNCTION (svasr
, rtx_code_function
, (ASHIFTRT
, ASHIFTRT
))
3461 FUNCTION (svasr_wide
, shift_wide
, (ASHIFTRT
, UNSPEC_ASHIFTRT_WIDE
))
3462 FUNCTION (svasrd
, unspec_based_function
, (UNSPEC_ASRD
, -1, -1))
3463 FUNCTION (svbfdot
, fixed_insn_function
, (CODE_FOR_aarch64_sve_bfdotvnx4sf
))
3464 FUNCTION (svbfdot_lane
, fixed_insn_function
,
3465 (CODE_FOR_aarch64_sve_bfdot_lanevnx4sf
))
3466 FUNCTION (svbfmlalb
, fixed_insn_function
, (CODE_FOR_aarch64_sve_bfmlalbvnx4sf
))
3467 FUNCTION (svbfmlalb_lane
, fixed_insn_function
,
3468 (CODE_FOR_aarch64_sve_bfmlalb_lanevnx4sf
))
3469 FUNCTION (svbfmlalt
, fixed_insn_function
, (CODE_FOR_aarch64_sve_bfmlaltvnx4sf
))
3470 FUNCTION (svbfmlalt_lane
, fixed_insn_function
,
3471 (CODE_FOR_aarch64_sve_bfmlalt_lanevnx4sf
))
3472 FUNCTION (svbfmmla
, fixed_insn_function
, (CODE_FOR_aarch64_sve_bfmmlavnx4sf
))
3473 FUNCTION (svbic
, svbic_impl
,)
3474 FUNCTION (svbrka
, svbrk_unary_impl
, (UNSPEC_BRKA
))
3475 FUNCTION (svbrkb
, svbrk_unary_impl
, (UNSPEC_BRKB
))
3476 FUNCTION (svbrkn
, svbrk_binary_impl
, (UNSPEC_BRKN
))
3477 FUNCTION (svbrkpa
, svbrk_binary_impl
, (UNSPEC_BRKPA
))
3478 FUNCTION (svbrkpb
, svbrk_binary_impl
, (UNSPEC_BRKPB
))
3479 FUNCTION (svcadd
, svcadd_impl
,)
3480 FUNCTION (svclasta
, svclast_impl
, (UNSPEC_CLASTA
))
3481 FUNCTION (svclastb
, svclast_impl
, (UNSPEC_CLASTB
))
3482 FUNCTION (svcls
, unary_count
, (CLRSB
))
3483 FUNCTION (svclz
, unary_count
, (CLZ
))
3484 FUNCTION (svcmla
, svcmla_impl
,)
3485 FUNCTION (svcmla_lane
, svcmla_lane_impl
,)
3486 FUNCTION (svcmpeq
, svcmp_impl
, (EQ_EXPR
, UNSPEC_COND_FCMEQ
))
3487 FUNCTION (svcmpeq_wide
, svcmp_wide_impl
, (EQ_EXPR
, UNSPEC_COND_CMPEQ_WIDE
,
3488 UNSPEC_COND_CMPEQ_WIDE
))
3489 FUNCTION (svcmpge
, svcmp_impl
, (GE_EXPR
, UNSPEC_COND_FCMGE
))
3490 FUNCTION (svcmpge_wide
, svcmp_wide_impl
, (GE_EXPR
, UNSPEC_COND_CMPGE_WIDE
,
3491 UNSPEC_COND_CMPHS_WIDE
))
3492 FUNCTION (svcmpgt
, svcmp_impl
, (GT_EXPR
, UNSPEC_COND_FCMGT
))
3493 FUNCTION (svcmpgt_wide
, svcmp_wide_impl
, (GT_EXPR
, UNSPEC_COND_CMPGT_WIDE
,
3494 UNSPEC_COND_CMPHI_WIDE
))
3495 FUNCTION (svcmple
, svcmp_impl
, (LE_EXPR
, UNSPEC_COND_FCMLE
))
3496 FUNCTION (svcmple_wide
, svcmp_wide_impl
, (LE_EXPR
, UNSPEC_COND_CMPLE_WIDE
,
3497 UNSPEC_COND_CMPLS_WIDE
))
3498 FUNCTION (svcmplt
, svcmp_impl
, (LT_EXPR
, UNSPEC_COND_FCMLT
))
3499 FUNCTION (svcmplt_wide
, svcmp_wide_impl
, (LT_EXPR
, UNSPEC_COND_CMPLT_WIDE
,
3500 UNSPEC_COND_CMPLO_WIDE
))
3501 FUNCTION (svcmpne
, svcmp_impl
, (NE_EXPR
, UNSPEC_COND_FCMNE
))
3502 FUNCTION (svcmpne_wide
, svcmp_wide_impl
, (NE_EXPR
, UNSPEC_COND_CMPNE_WIDE
,
3503 UNSPEC_COND_CMPNE_WIDE
))
3504 FUNCTION (svcmpuo
, svcmpuo_impl
,)
3505 FUNCTION (svcnot
, svcnot_impl
,)
3506 FUNCTION (svcnt
, unary_count
, (POPCOUNT
))
3507 FUNCTION (svcntb
, svcnt_bhwd_impl
, (VNx16QImode
))
3508 FUNCTION (svcntb_pat
, svcnt_bhwd_pat_impl
, (VNx16QImode
))
3509 FUNCTION (svcntd
, svcnt_bhwd_impl
, (VNx2DImode
))
3510 FUNCTION (svcntd_pat
, svcnt_bhwd_pat_impl
, (VNx2DImode
))
3511 FUNCTION (svcnth
, svcnt_bhwd_impl
, (VNx8HImode
))
3512 FUNCTION (svcnth_pat
, svcnt_bhwd_pat_impl
, (VNx8HImode
))
3513 FUNCTION (svcntp
, svcntp_impl
,)
3514 FUNCTION (svcntw
, svcnt_bhwd_impl
, (VNx4SImode
))
3515 FUNCTION (svcntw_pat
, svcnt_bhwd_pat_impl
, (VNx4SImode
))
3516 FUNCTION (svcompact
, svcompact_impl
,)
3517 FUNCTION (svcreate2
, svcreate_impl
, (2))
3518 FUNCTION (svcreate3
, svcreate_impl
, (3))
3519 FUNCTION (svcreate4
, svcreate_impl
, (4))
3520 FUNCTION (svcvt
, svcvt_impl
,)
3521 FUNCTION (svcvtnt
, svcvtnt_impl
,)
3522 FUNCTION (svdiv
, svdiv_impl
,)
3523 FUNCTION (svdivr
, rtx_code_function_rotated
, (DIV
, UDIV
, UNSPEC_COND_FDIV
))
3524 FUNCTION (svdot
, svdot_impl
,)
3525 FUNCTION (svdot_lane
, svdotprod_lane_impl
, (UNSPEC_SDOT
, UNSPEC_UDOT
,
3526 UNSPEC_FDOT
, UNSPEC_DOT_LANE_FP8
))
3527 FUNCTION (svdup
, svdup_impl
,)
3528 FUNCTION (svdup_lane
, svdup_lane_impl
,)
3529 FUNCTION (svdupq
, svdupq_impl
,)
3530 FUNCTION (svdupq_lane
, svdupq_lane_impl
,)
3531 FUNCTION (sveor
, rtx_code_function
, (XOR
, XOR
, -1))
3532 FUNCTION (sveorv
, sveorv_impl
,)
3533 FUNCTION (svexpa
, unspec_based_function
, (-1, -1, UNSPEC_FEXPA
))
3534 FUNCTION (svext
, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext
),)
3535 FUNCTION (svextb
, svext_bhw_impl
, (QImode
))
3536 FUNCTION (svexth
, svext_bhw_impl
, (HImode
))
3537 FUNCTION (svextw
, svext_bhw_impl
, (SImode
))
3538 FUNCTION (svget2
, svget_impl
, (2))
3539 FUNCTION (svget3
, svget_impl
, (3))
3540 FUNCTION (svget4
, svget_impl
, (4))
3541 FUNCTION (svindex
, svindex_impl
,)
3542 FUNCTION (svinsr
, svinsr_impl
,)
3543 FUNCTION (svlasta
, svlast_impl
, (UNSPEC_LASTA
))
3544 FUNCTION (svlastb
, svlast_impl
, (UNSPEC_LASTB
))
3545 FUNCTION (svld1
, svld1_impl
,)
3546 FUNCTION (svld1_gather
, svld1_gather_impl
,)
3547 FUNCTION (svld1ro
, svld1ro_impl
,)
3548 FUNCTION (svld1rq
, svld1rq_impl
,)
3549 FUNCTION (svld1sb
, svld1_extend_impl
, (TYPE_SUFFIX_s8
))
3550 FUNCTION (svld1sb_gather
, svld1_gather_extend_impl
, (TYPE_SUFFIX_s8
))
3551 FUNCTION (svld1sh
, svld1_extend_impl
, (TYPE_SUFFIX_s16
))
3552 FUNCTION (svld1sh_gather
, svld1_gather_extend_impl
, (TYPE_SUFFIX_s16
))
3553 FUNCTION (svld1sw
, svld1_extend_impl
, (TYPE_SUFFIX_s32
))
3554 FUNCTION (svld1sw_gather
, svld1_gather_extend_impl
, (TYPE_SUFFIX_s32
))
3555 FUNCTION (svld1ub
, svld1_extend_impl
, (TYPE_SUFFIX_u8
))
3556 FUNCTION (svld1ub_gather
, svld1_gather_extend_impl
, (TYPE_SUFFIX_u8
))
3557 FUNCTION (svld1uh
, svld1_extend_impl
, (TYPE_SUFFIX_u16
))
3558 FUNCTION (svld1uh_gather
, svld1_gather_extend_impl
, (TYPE_SUFFIX_u16
))
3559 FUNCTION (svld1uw
, svld1_extend_impl
, (TYPE_SUFFIX_u32
))
3560 FUNCTION (svld1uw_gather
, svld1_gather_extend_impl
, (TYPE_SUFFIX_u32
))
3561 FUNCTION (svld2
, svld234_impl
, (2))
3562 FUNCTION (svld3
, svld234_impl
, (3))
3563 FUNCTION (svld4
, svld234_impl
, (4))
3564 FUNCTION (svldff1
, svldxf1_impl
, (UNSPEC_LDFF1
))
3565 FUNCTION (svldff1_gather
, svldff1_gather_impl
,)
3566 FUNCTION (svldff1sb
, svldxf1_extend_impl
, (TYPE_SUFFIX_s8
, UNSPEC_LDFF1
))
3567 FUNCTION (svldff1sb_gather
, svldff1_gather_extend
, (TYPE_SUFFIX_s8
))
3568 FUNCTION (svldff1sh
, svldxf1_extend_impl
, (TYPE_SUFFIX_s16
, UNSPEC_LDFF1
))
3569 FUNCTION (svldff1sh_gather
, svldff1_gather_extend
, (TYPE_SUFFIX_s16
))
3570 FUNCTION (svldff1sw
, svldxf1_extend_impl
, (TYPE_SUFFIX_s32
, UNSPEC_LDFF1
))
3571 FUNCTION (svldff1sw_gather
, svldff1_gather_extend
, (TYPE_SUFFIX_s32
))
3572 FUNCTION (svldff1ub
, svldxf1_extend_impl
, (TYPE_SUFFIX_u8
, UNSPEC_LDFF1
))
3573 FUNCTION (svldff1ub_gather
, svldff1_gather_extend
, (TYPE_SUFFIX_u8
))
3574 FUNCTION (svldff1uh
, svldxf1_extend_impl
, (TYPE_SUFFIX_u16
, UNSPEC_LDFF1
))
3575 FUNCTION (svldff1uh_gather
, svldff1_gather_extend
, (TYPE_SUFFIX_u16
))
3576 FUNCTION (svldff1uw
, svldxf1_extend_impl
, (TYPE_SUFFIX_u32
, UNSPEC_LDFF1
))
3577 FUNCTION (svldff1uw_gather
, svldff1_gather_extend
, (TYPE_SUFFIX_u32
))
3578 FUNCTION (svldnf1
, svldxf1_impl
, (UNSPEC_LDNF1
))
3579 FUNCTION (svldnf1sb
, svldxf1_extend_impl
, (TYPE_SUFFIX_s8
, UNSPEC_LDNF1
))
3580 FUNCTION (svldnf1sh
, svldxf1_extend_impl
, (TYPE_SUFFIX_s16
, UNSPEC_LDNF1
))
3581 FUNCTION (svldnf1sw
, svldxf1_extend_impl
, (TYPE_SUFFIX_s32
, UNSPEC_LDNF1
))
3582 FUNCTION (svldnf1ub
, svldxf1_extend_impl
, (TYPE_SUFFIX_u8
, UNSPEC_LDNF1
))
3583 FUNCTION (svldnf1uh
, svldxf1_extend_impl
, (TYPE_SUFFIX_u16
, UNSPEC_LDNF1
))
3584 FUNCTION (svldnf1uw
, svldxf1_extend_impl
, (TYPE_SUFFIX_u32
, UNSPEC_LDNF1
))
3585 FUNCTION (svldnt1
, svldnt1_impl
,)
3586 FUNCTION (svlen
, svlen_impl
,)
3587 FUNCTION (svlsl
, svlsl_impl
,)
3588 FUNCTION (svlsl_wide
, shift_wide
, (ASHIFT
, UNSPEC_ASHIFT_WIDE
))
3589 FUNCTION (svlsr
, rtx_code_function
, (LSHIFTRT
, LSHIFTRT
))
3590 FUNCTION (svlsr_wide
, shift_wide
, (LSHIFTRT
, UNSPEC_LSHIFTRT_WIDE
))
3591 FUNCTION (svmad
, svmad_impl
,)
3592 FUNCTION (svmax
, rtx_code_function
, (SMAX
, UMAX
, UNSPEC_COND_FMAX
,
3594 FUNCTION (svmaxnm
, cond_or_uncond_unspec_function
, (UNSPEC_COND_FMAXNM
,
3596 FUNCTION (svmaxnmv
, svmaxnmv_impl
,)
3597 FUNCTION (svmaxv
, svmaxv_impl
,)
3598 FUNCTION (svmin
, rtx_code_function
, (SMIN
, UMIN
, UNSPEC_COND_FMIN
,
3600 FUNCTION (svminnm
, cond_or_uncond_unspec_function
, (UNSPEC_COND_FMINNM
,
3602 FUNCTION (svminnmv
, svminnmv_impl
,)
3603 FUNCTION (svminv
, svminv_impl
,)
3604 FUNCTION (svmla
, svmla_impl
,)
3605 FUNCTION (svmla_lane
, svmla_lane_impl
,)
3606 FUNCTION (svmls
, svmls_impl
,)
3607 FUNCTION (svmls_lane
, svmls_lane_impl
,)
3608 FUNCTION (svmmla
, svmmla_impl
,)
3609 FUNCTION (svmov
, svmov_impl
,)
3610 FUNCTION (svmsb
, svmsb_impl
,)
3611 FUNCTION (svmul
, svmul_impl
,)
3612 FUNCTION (svmul_lane
, CODE_FOR_MODE0 (aarch64_mul_lane
),)
3613 FUNCTION (svmulh
, unspec_based_function
, (UNSPEC_SMUL_HIGHPART
,
3614 UNSPEC_UMUL_HIGHPART
, -1))
3615 FUNCTION (svmulx
, unspec_based_function
, (-1, -1, UNSPEC_COND_FMULX
))
3616 FUNCTION (svnand
, svnand_impl
,)
3617 FUNCTION (svneg
, quiet
<rtx_code_function
>, (NEG
, NEG
, UNSPEC_COND_FNEG
))
3618 FUNCTION (svnmad
, unspec_based_function
, (-1, -1, UNSPEC_COND_FNMLA
))
3619 FUNCTION (svnmla
, unspec_based_function_rotated
, (-1, -1, UNSPEC_COND_FNMLA
))
3620 FUNCTION (svnmls
, unspec_based_function_rotated
, (-1, -1, UNSPEC_COND_FNMLS
))
3621 FUNCTION (svnmsb
, unspec_based_function
, (-1, -1, UNSPEC_COND_FNMLS
))
3622 FUNCTION (svnor
, svnor_impl
,)
3623 FUNCTION (svnot
, svnot_impl
,)
3624 FUNCTION (svorn
, svorn_impl
,)
3625 FUNCTION (svorr
, rtx_code_function
, (IOR
, IOR
))
3626 FUNCTION (svorv
, svorv_impl
,)
3627 FUNCTION (svpfalse
, svpfalse_impl
,)
3628 FUNCTION (svpfirst
, svpfirst_svpnext_impl
, (UNSPEC_PFIRST
))
3629 FUNCTION (svpnext
, svpfirst_svpnext_impl
, (UNSPEC_PNEXT
))
3630 FUNCTION (svprfb
, svprf_bhwd_impl
, (VNx16QImode
))
3631 FUNCTION (svprfb_gather
, svprf_bhwd_gather_impl
, (VNx16QImode
))
3632 FUNCTION (svprfd
, svprf_bhwd_impl
, (VNx2DImode
))
3633 FUNCTION (svprfd_gather
, svprf_bhwd_gather_impl
, (VNx2DImode
))
3634 FUNCTION (svprfh
, svprf_bhwd_impl
, (VNx8HImode
))
3635 FUNCTION (svprfh_gather
, svprf_bhwd_gather_impl
, (VNx8HImode
))
3636 FUNCTION (svprfw
, svprf_bhwd_impl
, (VNx4SImode
))
3637 FUNCTION (svprfw_gather
, svprf_bhwd_gather_impl
, (VNx4SImode
))
3638 FUNCTION (svptest_any
, svptest_impl
, (NE
))
3639 FUNCTION (svptest_first
, svptest_impl
, (LT
))
3640 FUNCTION (svptest_last
, svptest_impl
, (LTU
))
3641 FUNCTION (svptrue
, svptrue_impl
,)
3642 FUNCTION (svptrue_pat
, svptrue_pat_impl
,)
3643 FUNCTION (svqadd
, rtx_code_function
, (SS_PLUS
, US_PLUS
, -1))
3644 FUNCTION (svqdecb
, svqdec_bhwd_impl
, (QImode
))
3645 FUNCTION (svqdecb_pat
, svqdec_bhwd_impl
, (QImode
))
3646 FUNCTION (svqdecd
, svqdec_bhwd_impl
, (DImode
))
3647 FUNCTION (svqdecd_pat
, svqdec_bhwd_impl
, (DImode
))
3648 FUNCTION (svqdech
, svqdec_bhwd_impl
, (HImode
))
3649 FUNCTION (svqdech_pat
, svqdec_bhwd_impl
, (HImode
))
3650 FUNCTION (svqdecp
, svqdecp_svqincp_impl
, (SS_MINUS
, US_MINUS
))
3651 FUNCTION (svqdecw
, svqdec_bhwd_impl
, (SImode
))
3652 FUNCTION (svqdecw_pat
, svqdec_bhwd_impl
, (SImode
))
3653 FUNCTION (svqincb
, svqinc_bhwd_impl
, (QImode
))
3654 FUNCTION (svqincb_pat
, svqinc_bhwd_impl
, (QImode
))
3655 FUNCTION (svqincd
, svqinc_bhwd_impl
, (DImode
))
3656 FUNCTION (svqincd_pat
, svqinc_bhwd_impl
, (DImode
))
3657 FUNCTION (svqinch
, svqinc_bhwd_impl
, (HImode
))
3658 FUNCTION (svqinch_pat
, svqinc_bhwd_impl
, (HImode
))
3659 FUNCTION (svqincp
, svqdecp_svqincp_impl
, (SS_PLUS
, US_PLUS
))
3660 FUNCTION (svqincw
, svqinc_bhwd_impl
, (SImode
))
3661 FUNCTION (svqincw_pat
, svqinc_bhwd_impl
, (SImode
))
3662 FUNCTION (svqsub
, rtx_code_function
, (SS_MINUS
, US_MINUS
, -1))
3663 FUNCTION (svrbit
, rtx_code_function
, (BITREVERSE
, BITREVERSE
, -1))
3664 FUNCTION (svrdffr
, svrdffr_impl
,)
3665 FUNCTION (svrecpe
, unspec_based_function
, (-1, UNSPEC_URECPE
, UNSPEC_FRECPE
))
3666 FUNCTION (svrecps
, unspec_based_function
, (-1, -1, UNSPEC_FRECPS
))
3667 FUNCTION (svrecpx
, unspec_based_function
, (-1, -1, UNSPEC_COND_FRECPX
))
3668 FUNCTION (svreinterpret
, svreinterpret_impl
,)
3669 FUNCTION (svrev
, svrev_impl
,)
3670 FUNCTION (svrevb
, unspec_based_function
, (UNSPEC_REVB
, UNSPEC_REVB
, -1))
3671 FUNCTION (svrevh
, unspec_based_function
, (UNSPEC_REVH
, UNSPEC_REVH
, -1))
3672 FUNCTION (svrevw
, unspec_based_function
, (UNSPEC_REVW
, UNSPEC_REVW
, -1))
3673 FUNCTION (svrinta
, svrint_impl
, (round_optab
, UNSPEC_COND_FRINTA
))
3674 FUNCTION (svrinti
, svrint_impl
, (nearbyint_optab
, UNSPEC_COND_FRINTI
))
3675 FUNCTION (svrintm
, svrint_impl
, (floor_optab
, UNSPEC_COND_FRINTM
))
3676 FUNCTION (svrintn
, svrint_impl
, (roundeven_optab
, UNSPEC_COND_FRINTN
))
3677 FUNCTION (svrintp
, svrint_impl
, (ceil_optab
, UNSPEC_COND_FRINTP
))
3678 FUNCTION (svrintx
, svrint_impl
, (rint_optab
, UNSPEC_COND_FRINTX
))
3679 FUNCTION (svrintz
, svrint_impl
, (btrunc_optab
, UNSPEC_COND_FRINTZ
))
3680 FUNCTION (svrsqrte
, unspec_based_function
, (-1, UNSPEC_RSQRTE
, UNSPEC_RSQRTE
))
3681 FUNCTION (svrsqrts
, unspec_based_function
, (-1, -1, UNSPEC_RSQRTS
))
3682 FUNCTION (svscale
, unspec_based_function
, (-1, -1, UNSPEC_COND_FSCALE
))
3683 FUNCTION (svsel
, svsel_impl
,)
3684 FUNCTION (svset2
, svset_impl
, (2))
3685 FUNCTION (svset3
, svset_impl
, (3))
3686 FUNCTION (svset4
, svset_impl
, (4))
3687 FUNCTION (svsetffr
, svsetffr_impl
,)
3688 FUNCTION (svsplice
, svsplice_impl
,)
3689 FUNCTION (svsqrt
, rtx_code_function
, (SQRT
, SQRT
, UNSPEC_COND_FSQRT
))
3690 FUNCTION (svst1
, svst1_impl
,)
3691 FUNCTION (svst1_scatter
, svst1_scatter_impl
,)
3692 FUNCTION (svst1b
, svst1_truncate_impl
, (QImode
))
3693 FUNCTION (svst1b_scatter
, svst1_scatter_truncate_impl
, (QImode
))
3694 FUNCTION (svst1h
, svst1_truncate_impl
, (HImode
))
3695 FUNCTION (svst1h_scatter
, svst1_scatter_truncate_impl
, (HImode
))
3696 FUNCTION (svst1w
, svst1_truncate_impl
, (SImode
))
3697 FUNCTION (svst1w_scatter
, svst1_scatter_truncate_impl
, (SImode
))
3698 FUNCTION (svst2
, svst234_impl
, (2))
3699 FUNCTION (svst3
, svst234_impl
, (3))
3700 FUNCTION (svst4
, svst234_impl
, (4))
3701 FUNCTION (svstnt1
, svstnt1_impl
,)
3702 FUNCTION (svsub
, svsub_impl
,)
3703 FUNCTION (svsubr
, rtx_code_function_rotated
, (MINUS
, MINUS
, UNSPEC_COND_FSUB
))
3704 FUNCTION (svsudot
, svusdot_impl
, (true))
3705 FUNCTION (svsudot_lane
, svdotprod_lane_impl
, (UNSPEC_SUDOT
, -1, -1))
3706 FUNCTION (svtbl
, quiet
<unspec_based_uncond_function
>, (UNSPEC_TBL
, UNSPEC_TBL
,
3708 FUNCTION (svtmad
, CODE_FOR_MODE0 (aarch64_sve_tmad
),)
3709 FUNCTION (svtrn1
, svtrn_impl
, (0))
3710 FUNCTION (svtrn1q
, unspec_based_function
, (UNSPEC_TRN1Q
, UNSPEC_TRN1Q
,
3712 FUNCTION (svtrn2
, svtrn_impl
, (1))
3713 FUNCTION (svtrn2q
, unspec_based_function
, (UNSPEC_TRN2Q
, UNSPEC_TRN2Q
,
3715 FUNCTION (svtsmul
, unspec_based_function
, (-1, -1, UNSPEC_FTSMUL
))
3716 FUNCTION (svtssel
, unspec_based_function
, (-1, -1, UNSPEC_FTSSEL
))
3717 FUNCTION (svundef
, svundef_impl
, (1))
3718 FUNCTION (svundef2
, svundef_impl
, (2))
3719 FUNCTION (svundef3
, svundef_impl
, (3))
3720 FUNCTION (svundef4
, svundef_impl
, (4))
3721 FUNCTION (svunpkhi
, svunpk_impl
, (true))
3722 FUNCTION (svunpklo
, svunpk_impl
, (false))
3723 FUNCTION (svusdot
, svusdot_impl
, (false))
3724 FUNCTION (svusdot_lane
, svdotprod_lane_impl
, (UNSPEC_USDOT
, -1, -1))
3725 FUNCTION (svusmmla
, unspec_based_add_function
, (UNSPEC_USMATMUL
, -1, -1))
3726 FUNCTION (svuzp1
, svuzp_impl
, (0))
3727 FUNCTION (svuzp1q
, unspec_based_function
, (UNSPEC_UZP1Q
, UNSPEC_UZP1Q
,
3729 FUNCTION (svuzp2
, svuzp_impl
, (1))
3730 FUNCTION (svuzp2q
, unspec_based_function
, (UNSPEC_UZP2Q
, UNSPEC_UZP2Q
,
3732 FUNCTION (svwhilele
, svwhilelx_impl
, (UNSPEC_WHILELE
, UNSPEC_WHILELS
, true))
3733 FUNCTION (svwhilelt
, svwhilelx_impl
, (UNSPEC_WHILELT
, UNSPEC_WHILELO
, false))
3734 FUNCTION (svwrffr
, svwrffr_impl
,)
3735 FUNCTION (svzip1
, svzip_impl
, (0))
3736 FUNCTION (svzip1q
, unspec_based_function
, (UNSPEC_ZIP1Q
, UNSPEC_ZIP1Q
,
3738 FUNCTION (svzip2
, svzip_impl
, (1))
3739 FUNCTION (svzip2q
, unspec_based_function
, (UNSPEC_ZIP2Q
, UNSPEC_ZIP2Q
,
3741 NEON_SVE_BRIDGE_FUNCTION (svget_neonq
, svget_neonq_impl
,)
3742 NEON_SVE_BRIDGE_FUNCTION (svset_neonq
, svset_neonq_impl
,)
3743 NEON_SVE_BRIDGE_FUNCTION (svdup_neonq
, svdup_neonq_impl
,)
3745 } /* end namespace aarch64_sve */