1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The following define_subst rules are used to produce patterns representing
22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
23 ;; a vec_concat with zeroes. The order of the vec_concat operands differs
24 ;; for big-endian so we have a separate define_subst rule for each endianness.
25 (define_subst "add_vec_concat_subst_le"
26 [(set (match_operand:VDZ 0)
27 (match_operand:VDZ 1))]
29 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
32 (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
34 (define_subst "add_vec_concat_subst_be"
35 [(set (match_operand:VDZ 0)
36 (match_operand:VDZ 1))]
38 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
40 (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
43 ;; The subst_attr definitions used to annotate patterns further in the file.
44 ;; Patterns that need to have the above substitutions added to them should
45 ;; have <vczle><vczbe> added to their name.
46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
49 (define_expand "mov<mode>"
50 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
51 (match_operand:VALL_F16 1 "general_operand"))]
54 /* Force the operand into a register if it is not an
55 immediate whose use can be replaced with xzr.
56 If the mode is 16 bytes wide, then we will be doing
57 a stp in DI mode, so we check the validity of that.
58 If the mode is 8 bytes wide, then we will do doing a
59 normal str, so the check need not apply. */
60 if (GET_CODE (operands[0]) == MEM
61 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
62 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
63 && aarch64_mem_pair_operand (operands[0], DImode))
64 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
65 operands[1] = force_reg (<MODE>mode, operands[1]);
67 /* If a constant is too complex to force to memory (e.g. because it
68 contains CONST_POLY_INTs), build it up from individual elements instead.
69 We should only need to do this before RA; aarch64_legitimate_constant_p
70 should ensure that we don't try to rematerialize the constant later. */
71 if (GET_CODE (operands[1]) == CONST_VECTOR
72 && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
74 aarch64_expand_vector_init (operands[0], operands[1]);
80 (define_expand "movmisalign<mode>"
81 [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
82 (match_operand:VALL_F16 1 "general_operand"))]
83 "TARGET_FLOAT && !STRICT_ALIGNMENT"
85 /* This pattern is not permitted to fail during expansion: if both arguments
86 are non-registers (e.g. memory := constant, which can be created by the
87 auto-vectorizer), force operand 1 into a register. */
88 if (!register_operand (operands[0], <MODE>mode)
89 && !register_operand (operands[1], <MODE>mode))
90 operands[1] = force_reg (<MODE>mode, operands[1]);
93 (define_insn "aarch64_simd_dup<mode>"
94 [(set (match_operand:VDQ_I 0 "register_operand")
96 (match_operand:<VEL> 1 "register_operand")))]
98 {@ [ cons: =0 , 1 ; attrs: type ]
99 [ w , w ; neon_dup<q> ] dup\t%0.<Vtype>, %1.<Vetype>[0]
100 [ w , ?r ; neon_from_gp<q> ] dup\t%0.<Vtype>, %<vwcore>1
104 (define_insn "aarch64_simd_dup<mode>"
105 [(set (match_operand:VDQF_F16 0 "register_operand")
106 (vec_duplicate:VDQF_F16
107 (match_operand:<VEL> 1 "register_operand")))]
109 {@ [ cons: =0 , 1 ; attrs: type ]
110 [ w , w ; neon_dup<q> ] dup\t%0.<Vtype>, %1.<Vetype>[0]
111 [ w , r ; neon_from_gp<q> ] dup\t%0.<Vtype>, %<vwcore>1
115 (define_insn "@aarch64_dup_lane<mode>"
116 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
117 (vec_duplicate:VALL_F16
119 (match_operand:VALL_F16 1 "register_operand" "w")
120 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
124 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
125 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
127 [(set_attr "type" "neon_dup<q>")]
130 (define_insn "@aarch64_dup_lane_<vswap_width_name><mode>"
131 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
132 (vec_duplicate:VALL_F16_NO_V2Q
134 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
135 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
139 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
140 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
142 [(set_attr "type" "neon_dup<q>")]
145 (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
146 [(set (match_operand:VDMOV 0 "nonimmediate_operand")
147 (match_operand:VDMOV 1 "general_operand"))]
149 && (register_operand (operands[0], <MODE>mode)
150 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
151 {@ [cons: =0, 1; attrs: type, arch, length]
152 [w , m ; neon_load1_1reg<q> , * , *] ldr\t%d0, %1
153 [r , m ; load_8 , * , *] ldr\t%x0, %1
154 [m , Dz; store_8 , * , *] str\txzr, %0
155 [m , w ; neon_store1_1reg<q>, * , *] str\t%d1, %0
156 [m , r ; store_8 , * , *] str\t%x1, %0
157 [w , w ; neon_logic<q> , simd , *] mov\t%0.<Vbtype>, %1.<Vbtype>
158 [w , w ; neon_logic<q> , * , *] fmov\t%d0, %d1
159 [?r, w ; neon_to_gp<q> , base_simd, *] umov\t%0, %1.d[0]
160 [?r, w ; neon_to_gp<q> , * , *] fmov\t%x0, %d1
161 [?w, r ; f_mcr , * , *] fmov\t%d0, %1
162 [?r, r ; mov_reg , * , *] mov\t%0, %1
163 [w , Dn; neon_move<q> , simd , *] << aarch64_output_simd_mov_imm (operands[1], 64);
164 [w , Dz; f_mcr , * , *] fmov\t%d0, xzr
165 [w , Dx; neon_move , simd , 8] #
167 "CONST_INT_P (operands[1])
168 && aarch64_simd_special_constant_p (operands[1], <MODE>mode)
169 && FP_REGNUM_P (REGNO (operands[0]))"
172 aarch64_maybe_generate_simd_constant (operands[0], operands[1], <MODE>mode);
177 (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
178 [(set (match_operand:VQMOV 0 "nonimmediate_operand")
179 (match_operand:VQMOV 1 "general_operand"))]
181 && (register_operand (operands[0], <MODE>mode)
182 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
183 {@ [cons: =0, 1; attrs: type, arch, length]
184 [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
185 [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
186 [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
187 [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
188 [w , w ; * , sve , 4] mov\t%Z0.d, %Z1.d
189 [?r , w ; multiple , * , 8] #
190 [?w , r ; multiple , * , 8] #
191 [?r , r ; multiple , * , 8] #
192 [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_imm (operands[1], 128);
193 [w , Dz; fmov , * , 4] fmov\t%d0, xzr
194 [w , Dx; neon_move , simd, 8] #
197 && ((REG_P (operands[0])
198 && REG_P (operands[1])
199 && !(FP_REGNUM_P (REGNO (operands[0]))
200 && FP_REGNUM_P (REGNO (operands[1]))))
201 || (aarch64_simd_special_constant_p (operands[1], <MODE>mode)
202 && FP_REGNUM_P (REGNO (operands[0]))))"
205 if (GP_REGNUM_P (REGNO (operands[0]))
206 && GP_REGNUM_P (REGNO (operands[1])))
207 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
210 if (FP_REGNUM_P (REGNO (operands[0]))
211 && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
215 aarch64_split_simd_move (operands[0], operands[1]);
221 ;; When storing lane zero we can use the normal STR and its more permissive
224 (define_insn "aarch64_store_lane0<mode>"
225 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
226 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
227 (parallel [(match_operand 2 "const_int_operand" "n")])))]
229 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
230 "str\\t%<Vetype>1, %0"
231 [(set_attr "type" "neon_store1_1reg<q>")]
234 (define_insn "aarch64_simd_stp<mode>"
235 [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand")
236 (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand")))]
238 {@ [ cons: =0 , 1 ; attrs: type ]
239 [ Umn , w ; neon_stp ] stp\t%<Vetype>1, %<Vetype>1, %y0
240 [ Umn , r ; store_<ldpstp_vel_sz> ] stp\t%<vwcore>1, %<vwcore>1, %y0
244 (define_expand "@aarch64_split_simd_mov<mode>"
245 [(set (match_operand:VQMOV 0)
246 (match_operand:VQMOV 1))]
249 rtx dst = operands[0];
250 rtx src = operands[1];
252 if (GP_REGNUM_P (REGNO (src)))
254 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
255 rtx src_high_part = gen_highpart (<VHALF>mode, src);
256 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
258 emit_move_insn (dst_low_part, src_low_part);
259 emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
264 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
265 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
266 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
267 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
268 emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
269 emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
275 (define_expand "aarch64_get_half<mode>"
276 [(set (match_operand:<VHALF> 0 "register_operand")
278 (match_operand:VQMOV 1 "register_operand")
279 (match_operand 2 "ascending_int_parallel")))]
282 if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
284 emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
290 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
291 [(set (match_operand:<VHALF> 0 "register_operand")
293 (match_operand:VQMOV_NO2E 1 "register_operand")
294 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half")))]
296 {@ [ cons: =0 , 1 ; attrs: type , arch ]
297 [ w , w ; mov_reg , simd ] #
298 [ ?r , w ; neon_to_gp<q> , base_simd ] umov\t%0, %1.d[0]
299 [ ?r , w ; f_mrc , * ] fmov\t%0, %d1
301 "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
302 [(set (match_dup 0) (match_dup 1))]
304 operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
306 [(set_attr "length" "4")]
309 (define_insn "aarch64_simd_mov_from_<mode>high"
310 [(set (match_operand:<VHALF> 0 "register_operand")
312 (match_operand:VQMOV_NO2E 1 "register_operand")
313 (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half")))]
315 {@ [ cons: =0 , 1 ; attrs: type , arch ]
316 [ w , w ; neon_dup<q> , simd ] dup\t%d0, %1.d[1]
317 [ w , w ; * , sve ] ext\t%Z0.b, %Z0.b, %Z0.b, #8
318 [ ?r , w ; neon_to_gp<q> , simd ] umov\t%0, %1.d[1]
319 [ ?r , w ; f_mrc , * ] fmov\t%0, %1.d[1]
321 [(set_attr "length" "4")]
324 (define_insn "iorn<mode>3<vczle><vczbe>"
325 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
326 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
327 (match_operand:VDQ_I 1 "register_operand" "w")))]
329 "orn\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
330 [(set_attr "type" "neon_logic<q>")]
333 (define_insn "andn<mode>3<vczle><vczbe>"
334 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
335 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
336 (match_operand:VDQ_I 1 "register_operand" "w")))]
338 "bic\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
339 [(set_attr "type" "neon_logic<q>")]
342 (define_insn "add<mode>3<vczle><vczbe>"
343 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
344 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
345 (match_operand:VDQ_I 2 "register_operand" "w")))]
347 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
348 [(set_attr "type" "neon_add<q>")]
351 (define_insn "sub<mode>3<vczle><vczbe>"
352 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
353 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
354 (match_operand:VDQ_I 2 "register_operand" "w")))]
356 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
357 [(set_attr "type" "neon_sub<q>")]
360 (define_insn "mul<mode>3<vczle><vczbe>"
361 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
362 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
363 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
365 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
366 [(set_attr "type" "neon_mul_<Vetype><q>")]
369 (define_insn "bswap<mode>2"
370 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
371 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
373 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
374 [(set_attr "type" "neon_rev<q>")]
377 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
378 [(set (match_operand:VB 0 "register_operand" "=w")
379 (bitreverse:VB (match_operand:VB 1 "register_operand" "w")))]
381 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
382 [(set_attr "type" "neon_rbit")]
385 (define_expand "ctz<mode>2"
386 [(set (match_operand:VS 0 "register_operand")
387 (ctz:VS (match_operand:VS 1 "register_operand")))]
390 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
391 rtx op0_castsi2qi = force_subreg (<VS:VSI2QI>mode, operands[0],
393 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
394 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
399 (define_expand "@xorsign<mode>3"
400 [(match_operand:VHSDF 0 "register_operand")
401 (match_operand:VHSDF 1 "register_operand")
402 (match_operand:VHSDF 2 "register_operand")]
406 machine_mode imode = <V_INT_EQUIV>mode;
407 rtx v_bitmask = gen_reg_rtx (imode);
408 rtx op1x = gen_reg_rtx (imode);
409 rtx op2x = gen_reg_rtx (imode);
411 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
412 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
414 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
416 emit_move_insn (v_bitmask,
417 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
418 HOST_WIDE_INT_M1U << bits));
420 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
421 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
422 emit_move_insn (operands[0],
423 lowpart_subreg (<MODE>mode, op1x, imode));
428 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
429 ;; fact that their usage need to guarantee that the source vectors are
430 ;; contiguous. It would be wrong to describe the operation without being able
431 ;; to describe the permute that is also required, but even if that is done
432 ;; the permute would have been created as a LOAD_LANES which means the values
433 ;; in the registers are in the wrong order.
434 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
435 [(set (match_operand:VHSDF 0 "register_operand" "=w")
436 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
437 (match_operand:VHSDF 2 "register_operand" "w")]
440 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
441 [(set_attr "type" "neon_fcadd")]
444 (define_expand "cadd<rot><mode>3"
445 [(set (match_operand:VHSDF 0 "register_operand")
446 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
447 (match_operand:VHSDF 2 "register_operand")]
449 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
452 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
453 [(set (match_operand:VHSDF 0 "register_operand" "=w")
454 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
455 (match_operand:VHSDF 3 "register_operand" "w")]
457 (match_operand:VHSDF 1 "register_operand" "0")))]
459 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
460 [(set_attr "type" "neon_fcmla")]
464 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
465 [(set (match_operand:VHSDF 0 "register_operand" "=w")
466 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
467 (match_operand:VHSDF 3 "register_operand" "w")
468 (match_operand:SI 4 "const_int_operand" "n")]
470 (match_operand:VHSDF 1 "register_operand" "0")))]
473 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
474 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
476 [(set_attr "type" "neon_fcmla")]
479 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
480 [(set (match_operand:V4HF 0 "register_operand" "=w")
481 (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
482 (match_operand:V8HF 3 "register_operand" "w")
483 (match_operand:SI 4 "const_int_operand" "n")]
485 (match_operand:V4HF 1 "register_operand" "0")))]
488 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
489 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
491 [(set_attr "type" "neon_fcmla")]
494 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
495 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
496 (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
497 (match_operand:<VHALF> 3 "register_operand" "w")
498 (match_operand:SI 4 "const_int_operand" "n")]
500 (match_operand:VQ_HSF 1 "register_operand" "0")))]
503 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
505 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
506 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
508 [(set_attr "type" "neon_fcmla")]
511 ;; The complex mla/mls operations always need to expand to two instructions.
512 ;; The first operation does half the computation and the second does the
513 ;; remainder. Because of this, expand early.
514 (define_expand "cml<fcmac1><conj_op><mode>4"
515 [(set (match_operand:VHSDF 0 "register_operand")
516 (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
517 (match_operand:VHSDF 2 "register_operand")]
519 (match_operand:VHSDF 3 "register_operand")))]
520 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
522 rtx tmp = gen_reg_rtx (<MODE>mode);
523 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
524 operands[2], operands[1]));
525 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
526 operands[2], operands[1]));
530 ;; The complex mul operations always need to expand to two instructions.
531 ;; The first operation does half the computation and the second does the
532 ;; remainder. Because of this, expand early.
533 (define_expand "cmul<conj_op><mode>3"
534 [(set (match_operand:VHSDF 0 "register_operand")
535 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
536 (match_operand:VHSDF 2 "register_operand")]
538 "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
540 rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
541 rtx res1 = gen_reg_rtx (<MODE>mode);
542 emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
543 operands[2], operands[1]));
544 emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
545 operands[2], operands[1]));
549 ;; These expands map to the Dot Product optab the vectorizer checks for
550 ;; and to the intrinsics patttern.
551 ;; The auto-vectorizer expects a dot product builtin that also does an
552 ;; accumulation into the provided register.
553 ;; Given the following pattern
555 ;; for (i=0; i<len; i++) {
561 ;; This can be auto-vectorized to
562 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
564 ;; given enough iterations. However the vectorizer can keep unrolling the loop
565 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
566 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
569 ;; and so the vectorizer provides r, in which the result has to be accumulated.
570 (define_insn "<sur>dot_prod<mode><vsi2qi><vczle><vczbe>"
571 [(set (match_operand:VS 0 "register_operand" "=w")
573 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
574 (match_operand:<VSI2QI> 2 "register_operand" "w")]
576 (match_operand:VS 3 "register_operand" "0")))]
578 "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
579 [(set_attr "type" "neon_dot<q>")]
582 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
583 ;; (vector) Dot Product operation and the vectorized optab.
584 (define_insn "usdot_prod<mode><vsi2qi><vczle><vczbe>"
585 [(set (match_operand:VS 0 "register_operand" "=w")
587 (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
588 (match_operand:<VSI2QI> 2 "register_operand" "w")]
590 (match_operand:VS 3 "register_operand" "0")))]
592 "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
593 [(set_attr "type" "neon_dot<q>")]
596 ;; These instructions map to the __builtins for the Dot Product
597 ;; indexed operations.
598 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
599 [(set (match_operand:VS 0 "register_operand" "=w")
601 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
602 (match_operand:V8QI 3 "register_operand" "<h_con>")
603 (match_operand:SI 4 "immediate_operand" "i")]
605 (match_operand:VS 1 "register_operand" "0")))]
608 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
609 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
611 [(set_attr "type" "neon_dot<q>")]
614 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
615 [(set (match_operand:VS 0 "register_operand" "=w")
617 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
618 (match_operand:V16QI 3 "register_operand" "<h_con>")
619 (match_operand:SI 4 "immediate_operand" "i")]
621 (match_operand:VS 1 "register_operand" "0")))]
624 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
625 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
627 [(set_attr "type" "neon_dot<q>")]
630 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
631 ;; (by element) Dot Product operations.
632 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
633 [(set (match_operand:VS 0 "register_operand" "=w")
635 (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
636 (match_operand:VB 3 "register_operand" "w")
637 (match_operand:SI 4 "immediate_operand" "i")]
639 (match_operand:VS 1 "register_operand" "0")))]
642 int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
643 int lane = INTVAL (operands[4]);
644 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
645 return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
647 [(set_attr "type" "neon_dot<VS:q>")]
650 (define_expand "copysign<mode>3"
651 [(match_operand:VHSDF 0 "register_operand")
652 (match_operand:VHSDF 1 "register_operand")
653 (match_operand:VHSDF 2 "nonmemory_operand")]
656 machine_mode int_mode = <V_INT_EQUIV>mode;
657 rtx v_bitmask = gen_reg_rtx (int_mode);
658 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
660 emit_move_insn (v_bitmask,
661 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
662 HOST_WIDE_INT_M1U << bits));
664 /* copysign (x, -1) should instead be expanded as orr with the sign
666 if (!REG_P (operands[2]))
668 rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
669 if (GET_CODE (op2_elt) == CONST_DOUBLE
670 && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
672 emit_insn (gen_ior<v_int_equiv>3 (
673 lowpart_subreg (int_mode, operands[0], <MODE>mode),
674 lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
679 operands[2] = force_reg (<MODE>mode, operands[2]);
680 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
681 operands[2], operands[1]));
686 (define_insn "mul_lane<mode>3"
687 [(set (match_operand:VMULD 0 "register_operand" "=w")
691 (match_operand:<VCOND> 2 "register_operand" "<h_con>")
692 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
693 (match_operand:VMULD 1 "register_operand" "w")))]
696 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
697 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
699 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
702 (define_insn "mul_laneq<mode>3"
703 [(set (match_operand:VMUL 0 "register_operand" "=w")
707 (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
708 (parallel [(match_operand:SI 3 "immediate_operand")])))
709 (match_operand:VMUL 1 "register_operand" "w")))]
712 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
713 return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
715 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
718 (define_insn "mul_n<mode>3"
719 [(set (match_operand:VMUL 0 "register_operand" "=w")
722 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
723 (match_operand:VMUL 1 "register_operand" "w")))]
725 "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
726 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
729 (define_insn "@aarch64_rsqrte<mode>"
730 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
731 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
734 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
735 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
737 (define_insn "@aarch64_rsqrts<mode>"
738 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
739 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
740 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
743 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
744 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
746 (define_expand "rsqrt<mode>2"
747 [(set (match_operand:VALLF 0 "register_operand")
748 (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
752 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
756 (define_insn "aarch64_ursqrte<mode>"
757 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
758 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
761 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
762 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
764 (define_insn "*aarch64_mul3_elt_to_64v2df"
765 [(set (match_operand:DF 0 "register_operand" "=w")
768 (match_operand:V2DF 1 "register_operand" "w")
769 (parallel [(match_operand:SI 2 "immediate_operand")]))
770 (match_operand:DF 3 "register_operand" "w")))]
773 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
774 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
776 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
779 (define_insn "neg<mode>2<vczle><vczbe>"
780 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
781 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
783 "neg\t%0.<Vtype>, %1.<Vtype>"
784 [(set_attr "type" "neon_neg<q>")]
787 (define_insn "abs<mode>2<vczle><vczbe>"
788 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
789 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
791 "abs\t%0.<Vtype>, %1.<Vtype>"
792 [(set_attr "type" "neon_abs<q>")]
795 ;; The intrinsic version of integer ABS must not be allowed to
796 ;; combine with any operation with an integrated ABS step, such
798 (define_insn "aarch64_abs<mode><vczle><vczbe>"
799 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
801 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
804 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
805 [(set_attr "type" "neon_abs<q>")]
808 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
809 ;; This isn't accurate as ABS treats always its input as a signed value.
810 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
811 ;; Whereas SABD would return 192 (-64 signed) on the above example.
812 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
813 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
814 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
817 (match_operand:VDQ_BHSI 1 "register_operand" "w")
818 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
823 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
824 [(set_attr "type" "neon_abd<q>")]
827 (define_expand "<su>abd<mode>3"
828 [(match_operand:VDQ_BHSI 0 "register_operand")
830 (match_operand:VDQ_BHSI 1 "register_operand")
831 (match_operand:VDQ_BHSI 2 "register_operand"))]
834 emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
839 (define_insn "aarch64_<su>abdl<mode>"
840 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
844 (match_operand:VD_BHSI 1 "register_operand" "w")
845 (match_operand:VD_BHSI 2 "register_operand" "w"))
850 "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
851 [(set_attr "type" "neon_abd<q>")]
854 (define_insn "aarch64_<su>abdl2<mode>_insn"
855 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
860 (match_operand:VQW 1 "register_operand" "w")
861 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
863 (match_operand:VQW 2 "register_operand" "w")
874 "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
875 [(set_attr "type" "neon_abd<q>")]
878 (define_expand "aarch64_<su>abdl2<mode>"
879 [(match_operand:<VDBLW> 0 "register_operand")
881 (match_operand:VQW 1 "register_operand")
882 (match_operand:VQW 2 "register_operand"))]
885 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
886 emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
892 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
898 (match_operand:VQW 1 "register_operand" "w")
899 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
902 (match_operand:VQW 2 "register_operand" "w")
905 "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
906 [(set_attr "type" "neon_abd_long")]
909 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
910 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
915 (match_operand:VQW 1 "register_operand" "w")
916 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
919 (match_operand:VQW 2 "register_operand" "w")
922 "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
923 [(set_attr "type" "neon_abd_long")]
926 (define_expand "vec_widen_<su>abd_hi_<mode>"
927 [(match_operand:<VWIDE> 0 "register_operand")
928 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
929 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
932 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
933 emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
939 (define_expand "vec_widen_<su>abd_lo_<mode>"
940 [(match_operand:<VWIDE> 0 "register_operand")
941 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
942 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
945 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
946 emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
952 (define_insn "aarch64_<su>abal<mode>"
953 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
958 (match_operand:VD_BHSI 2 "register_operand" "w")
959 (match_operand:VD_BHSI 3 "register_operand" "w"))
963 (match_operand:<VWIDE> 1 "register_operand" "0")))]
965 "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
966 [(set_attr "type" "neon_arith_acc<q>")]
969 (define_insn "aarch64_<su>abal2<mode>_insn"
970 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
976 (match_operand:VQW 2 "register_operand" "w")
977 (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
979 (match_operand:VQW 3 "register_operand" "w")
988 (match_operand:<VDBLW> 1 "register_operand" "0")))]
990 "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
991 [(set_attr "type" "neon_arith_acc<q>")]
994 (define_expand "aarch64_<su>abal2<mode>"
995 [(match_operand:<VDBLW> 0 "register_operand")
996 (match_operand:<VDBLW> 1 "register_operand")
998 (match_operand:VQW 2 "register_operand")
999 (match_operand:VQW 3 "register_operand"))]
1002 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1003 emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1004 operands[2], operands[3], hi));
1009 (define_expand "aarch64_<su>adalp<mode>"
1010 [(set (match_operand:<VDBLW> 0 "register_operand")
1014 (ANY_EXTEND:<V2XWIDE>
1015 (match_operand:VDQV_L 2 "register_operand"))
1017 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1019 (match_operand:<VDBLW> 1 "register_operand")))]
1022 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1023 operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1024 operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1028 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1029 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1033 (ANY_EXTEND:<V2XWIDE>
1034 (match_operand:VDQV_L 2 "register_operand" "w"))
1035 (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1036 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1037 (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1038 (match_operand:<VDBLW> 1 "register_operand" "0")))]
1040 && !rtx_equal_p (operands[3], operands[4])"
1041 "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1042 [(set_attr "type" "neon_reduc_add<q>")]
1045 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1046 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
1047 ;; reduction of the difference into a V4SI vector and accumulate that into
1048 ;; operand 3 before copying that into the result operand 0.
1049 ;; Perform that with a sequence of:
1050 ;; UABDL2 tmp.8h, op1.16b, op2.16b
1051 ;; UABAL tmp.8h, op1.8b, op2.8b
1052 ;; UADALP op3.4s, tmp.8h
1053 ;; MOV op0, op3 // should be eliminated in later passes.
1055 ;; For TARGET_DOTPROD we do:
1056 ;; MOV tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1057 ;; UABD tmp2.16b, op1.16b, op2.16b
1058 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1059 ;; MOV op0, op3 // RA will tie the operands of UDOT appropriately.
1061 ;; The signed version just uses the signed variants of the above instructions
1062 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1065 (define_expand "<su>sadv16qi"
1066 [(use (match_operand:V4SI 0 "register_operand"))
1067 (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1068 (match_operand:V16QI 2 "register_operand"))
1069 (use (match_operand:V4SI 3 "register_operand"))]
1074 rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1075 rtx abd = gen_reg_rtx (V16QImode);
1076 emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1077 emit_insn (gen_udot_prodv4siv16qi (operands[0], abd, ones,
1081 rtx reduc = gen_reg_rtx (V8HImode);
1082 emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1084 emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1085 gen_lowpart (V8QImode, operands[1]),
1086 gen_lowpart (V8QImode,
1088 emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1089 emit_move_insn (operands[0], operands[3]);
1094 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1095 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1096 (plus:VDQ_BHSI (minus:VDQ_BHSI
1098 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1099 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1103 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1105 "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1106 [(set_attr "type" "neon_arith_acc<q>")]
1109 (define_insn "fabd<mode>3<vczle><vczbe>"
1110 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1113 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1114 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1116 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1117 [(set_attr "type" "neon_fp_abd_<stype><q>")]
1120 ;; For AND (vector, register) and BIC (vector, immediate)
1121 (define_insn "and<mode>3<vczle><vczbe>"
1122 [(set (match_operand:VDQ_I 0 "register_operand")
1123 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1124 (match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
1126 {@ [ cons: =0 , 1 , 2 ]
1127 [ w , w , w ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1128 [ w , 0 , Db ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
1130 [(set_attr "type" "neon_logic<q>")]
1133 ;; For ORR (vector, register) and ORR (vector, immediate)
1134 (define_insn "ior<mode>3<vczle><vczbe>"
1135 [(set (match_operand:VDQ_I 0 "register_operand")
1136 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1137 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))]
1139 {@ [ cons: =0 , 1 , 2 ]
1140 [ w , w , w ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1141 [ w , 0 , Do ] << aarch64_output_simd_orr_imm (operands[2], <bitsize>);
1143 [(set_attr "type" "neon_logic<q>")]
1146 ;; For EOR (vector, register) and SVE EOR (vector, immediate)
1147 (define_insn "xor<mode>3<vczle><vczbe>"
1148 [(set (match_operand:VDQ_I 0 "register_operand")
1149 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1150 (match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))]
1152 {@ [ cons: =0 , 1 , 2 ]
1153 [ w , w , w ] eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1154 [ w , 0 , De ] << aarch64_output_simd_xor_imm (operands[2], <bitsize>);
1156 [(set_attr "type" "neon_logic<q>")]
1159 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1160 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1161 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1163 "not\t%0.<Vbtype>, %1.<Vbtype>"
1164 [(set_attr "type" "neon_logic<q>")]
1167 (define_insn "@aarch64_simd_vec_set<mode>"
1168 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1170 (vec_duplicate:VALL_F16
1171 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1172 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1173 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1174 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1176 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1177 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1178 switch (which_alternative)
1181 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1183 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1185 return "ld1\\t{%0.<Vetype>}[%p2], %1";
1190 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1193 (define_insn "aarch64_simd_vec_set_zero<mode>"
1194 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1196 (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1197 (match_operand:VALL_F16 3 "register_operand" "0")
1198 (match_operand:SI 2 "immediate_operand" "i")))]
1199 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1201 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1202 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1203 return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1207 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1208 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1210 (vec_duplicate:VALL_F16
1212 (match_operand:VALL_F16 3 "register_operand" "w")
1214 [(match_operand:SI 4 "immediate_operand" "i")])))
1215 (match_operand:VALL_F16 1 "register_operand" "0")
1216 (match_operand:SI 2 "immediate_operand" "i")))]
1217 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1219 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1220 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1221 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1223 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1225 [(set_attr "type" "neon_ins<q>")]
1228 (define_insn "@aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1229 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1230 (vec_merge:VALL_F16_NO_V2Q
1231 (vec_duplicate:VALL_F16_NO_V2Q
1233 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1235 [(match_operand:SI 4 "immediate_operand" "i")])))
1236 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1237 (match_operand:SI 2 "immediate_operand" "i")))]
1238 "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1240 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1241 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1242 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1243 INTVAL (operands[4]));
1245 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1247 [(set_attr "type" "neon_ins<q>")]
1250 (define_expand "signbit<mode>2"
1251 [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1252 (use (match_operand:VDQSF 1 "register_operand"))]
1255 int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1256 rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1258 operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1260 emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1265 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1266 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1267 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1268 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
1270 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1271 [(set_attr "type" "neon_shift_imm<q>")]
1274 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1275 [(set (match_operand:VDQ_I 0 "register_operand")
1276 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1277 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm")))]
1279 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
1280 [ w , w , D1 ; neon_compare<q> ] cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1281 [ w , w , Dr ; neon_shift_imm<q> ] sshr\t%0.<Vtype>, %1.<Vtype>, %2
1285 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1286 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1289 (match_operand:VDQ_I 2 "register_operand" "w")
1290 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1291 (match_operand:VDQ_I 1 "register_operand" "0")))]
1293 "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1294 [(set_attr "type" "neon_shift_acc<q>")]
1297 ;; After all the combinations and propagations of ROTATE have been
1298 ;; attempted split any remaining vector rotates into SHL + USRA sequences.
1299 ;; Don't match this after reload as the various possible sequence for this
1300 ;; require temporary registers.
1301 (define_insn_and_split "*aarch64_simd_rotate_imm<mode>"
1302 [(set (match_operand:VDQ_I 0 "register_operand" "=&w")
1303 (rotate:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1304 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm")))]
1305 "TARGET_SIMD && can_create_pseudo_p ()"
1309 (ashift:VDQ_I (match_dup 1)
1318 if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
1321 operands[3] = gen_reg_rtx (<MODE>mode);
1322 rtx shft_amnt = unwrap_const_vec_duplicate (operands[2]);
1323 int bitwidth = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1325 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1326 bitwidth - INTVAL (shft_amnt));
1328 [(set_attr "length" "8")]
1331 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1332 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1337 (<SHIFTEXTEND>:<V2XWIDE>
1338 (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1339 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1340 (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1341 (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1343 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1344 "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1345 [(set_attr "type" "neon_shift_acc<q>")]
1348 (define_expand "aarch64_<sra_op>sra_n<mode>"
1349 [(set (match_operand:VDQ_I 0 "register_operand")
1352 (match_operand:VDQ_I 2 "register_operand")
1353 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1354 (match_operand:VDQ_I 1 "register_operand")))]
1358 = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1362 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1363 [(match_operand:VSDQ_I_DI 0 "register_operand")
1364 (match_operand:VSDQ_I_DI 1 "register_operand")
1366 (match_operand:VSDQ_I_DI 2 "register_operand")
1367 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1370 /* Use this expander to create the rounding constant vector, which is
1371 1 << (shift - 1). Use wide_int here to ensure that the right TImode
1372 RTL is generated when handling the DImode expanders. */
1373 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1374 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1375 rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1376 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1377 if (VECTOR_MODE_P (<MODE>mode))
1379 shft = gen_const_vec_duplicate (<MODE>mode, shft);
1380 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1383 emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1384 operands[2], shft, rnd));
1389 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1390 [(set (match_operand:VDQ_I 0 "register_operand")
1391 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1392 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm")))]
1394 {@ [ cons: =0, 1, 2 ; attrs: type ]
1395 [ w , w, vs1 ; neon_add<q> ] add\t%0.<Vtype>, %1.<Vtype>, %1.<Vtype>
1396 [ w , w, Dl ; neon_shift_imm<q> ] shl\t%0.<Vtype>, %1.<Vtype>, %2
1400 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1401 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1402 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1403 (match_operand:VDQ_I 2 "register_operand" "w")))]
1405 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1406 [(set_attr "type" "neon_shift_reg<q>")]
1409 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1410 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1411 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1412 (match_operand:VDQ_I 2 "register_operand" "w")]
1413 UNSPEC_ASHIFT_UNSIGNED))]
1415 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1416 [(set_attr "type" "neon_shift_reg<q>")]
1419 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1420 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1421 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1422 (match_operand:VDQ_I 2 "register_operand" "w")]
1423 UNSPEC_ASHIFT_SIGNED))]
1425 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1426 [(set_attr "type" "neon_shift_reg<q>")]
1429 (define_expand "ashl<mode>3"
1430 [(match_operand:VDQ_I 0 "register_operand")
1431 (match_operand:VDQ_I 1 "register_operand")
1432 (match_operand:SI 2 "general_operand")]
1435 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1438 if (CONST_INT_P (operands[2]))
1440 shift_amount = INTVAL (operands[2]);
1441 if (shift_amount >= 0 && shift_amount < bit_width)
1443 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1445 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1452 operands[2] = force_reg (SImode, operands[2]);
1454 rtx tmp = gen_reg_rtx (<MODE>mode);
1455 emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1458 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1462 (define_expand "lshr<mode>3"
1463 [(match_operand:VDQ_I 0 "register_operand")
1464 (match_operand:VDQ_I 1 "register_operand")
1465 (match_operand:SI 2 "general_operand")]
1468 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1471 if (CONST_INT_P (operands[2]))
1473 shift_amount = INTVAL (operands[2]);
1474 if (shift_amount > 0 && shift_amount <= bit_width)
1476 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1478 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1485 operands[2] = force_reg (SImode, operands[2]);
1487 rtx tmp = gen_reg_rtx (SImode);
1488 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1489 emit_insn (gen_negsi2 (tmp, operands[2]));
1490 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1491 convert_to_mode (<VEL>mode, tmp, 0)));
1492 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1497 (define_expand "ashr<mode>3"
1498 [(match_operand:VDQ_I 0 "register_operand")
1499 (match_operand:VDQ_I 1 "register_operand")
1500 (match_operand:SI 2 "general_operand")]
1503 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1506 if (CONST_INT_P (operands[2]))
1508 shift_amount = INTVAL (operands[2]);
1509 if (shift_amount > 0 && shift_amount <= bit_width)
1511 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1513 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1520 operands[2] = force_reg (SImode, operands[2]);
1522 rtx tmp = gen_reg_rtx (SImode);
1523 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1524 emit_insn (gen_negsi2 (tmp, operands[2]));
1525 emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1527 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1532 (define_expand "vashl<mode>3"
1533 [(match_operand:VDQ_I 0 "register_operand")
1534 (match_operand:VDQ_I 1 "register_operand")
1535 (match_operand:VDQ_I 2 "register_operand")]
1538 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1543 (define_expand "vashr<mode>3"
1544 [(match_operand:VDQ_I 0 "register_operand")
1545 (match_operand:VDQ_I 1 "register_operand")
1546 (match_operand:VDQ_I 2 "register_operand")]
1549 rtx neg = gen_reg_rtx (<MODE>mode);
1550 emit (gen_neg<mode>2 (neg, operands[2]));
1551 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1557 (define_expand "aarch64_ashr_simddi"
1558 [(match_operand:DI 0 "register_operand")
1559 (match_operand:DI 1 "register_operand")
1560 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1563 /* An arithmetic shift right by 64 fills the result with copies of the sign
1564 bit, just like asr by 63 - however the standard pattern does not handle
1566 if (INTVAL (operands[2]) == 64)
1567 operands[2] = GEN_INT (63);
1568 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1573 (define_expand "vlshr<mode>3"
1574 [(match_operand:VDQ_I 0 "register_operand")
1575 (match_operand:VDQ_I 1 "register_operand")
1576 (match_operand:VDQ_I 2 "register_operand")]
1579 rtx neg = gen_reg_rtx (<MODE>mode);
1580 emit (gen_neg<mode>2 (neg, operands[2]));
1581 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1586 (define_expand "aarch64_lshr_simddi"
1587 [(match_operand:DI 0 "register_operand")
1588 (match_operand:DI 1 "register_operand")
1589 (match_operand:SI 2 "aarch64_shift_imm64_di")]
1592 if (INTVAL (operands[2]) == 64)
1593 emit_move_insn (operands[0], const0_rtx);
1595 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1600 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1601 (define_insn "vec_shr_<mode><vczle><vczbe>"
1602 [(set (match_operand:VD 0 "register_operand" "=w")
1603 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1604 (match_operand:SI 2 "immediate_operand" "i")]
1608 if (BYTES_BIG_ENDIAN)
1609 return "shl %d0, %d1, %2";
1611 return "ushr %d0, %d1, %2";
1613 [(set_attr "type" "neon_shift_imm")]
1616 (define_expand "vec_set<mode>"
1617 [(match_operand:VALL_F16 0 "register_operand")
1618 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1619 (match_operand:SI 2 "immediate_operand")]
1622 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1623 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1624 GEN_INT (elem), operands[0]));
1630 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1631 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1632 (plus:VDQ_BHSI (mult:VDQ_BHSI
1633 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1634 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1635 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1637 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1638 [(set_attr "type" "neon_mla_<Vetype><q>")]
1641 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1642 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1645 (vec_duplicate:VDQHS
1647 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1648 (parallel [(match_operand:SI 2 "immediate_operand")])))
1649 (match_operand:VDQHS 3 "register_operand" "w"))
1650 (match_operand:VDQHS 4 "register_operand" "0")))]
1653 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1654 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1656 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1659 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1660 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1663 (vec_duplicate:VDQHS
1665 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1666 (parallel [(match_operand:SI 2 "immediate_operand")])))
1667 (match_operand:VDQHS 3 "register_operand" "w"))
1668 (match_operand:VDQHS 4 "register_operand" "0")))]
1671 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1672 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1674 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1677 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1678 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1681 (vec_duplicate:VDQHS
1682 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1683 (match_operand:VDQHS 2 "register_operand" "w"))
1684 (match_operand:VDQHS 1 "register_operand" "0")))]
1686 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1687 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1690 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1691 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1692 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1693 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1694 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1696 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1697 [(set_attr "type" "neon_mla_<Vetype><q>")]
1700 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1701 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1703 (match_operand:VDQHS 4 "register_operand" "0")
1705 (vec_duplicate:VDQHS
1707 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1708 (parallel [(match_operand:SI 2 "immediate_operand")])))
1709 (match_operand:VDQHS 3 "register_operand" "w"))))]
1712 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1713 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1715 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1718 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1719 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1721 (match_operand:VDQHS 4 "register_operand" "0")
1723 (vec_duplicate:VDQHS
1725 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1726 (parallel [(match_operand:SI 2 "immediate_operand")])))
1727 (match_operand:VDQHS 3 "register_operand" "w"))))]
1730 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1731 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1733 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1736 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1737 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1739 (match_operand:VDQHS 1 "register_operand" "0")
1741 (vec_duplicate:VDQHS
1742 (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1743 (match_operand:VDQHS 2 "register_operand" "w"))))]
1745 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1746 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1749 ;; Max/Min operations.
1750 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1751 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1752 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1753 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1755 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1756 [(set_attr "type" "neon_minmax<q>")]
1759 (define_expand "<su><maxmin>v2di3"
1760 [(set (match_operand:V2DI 0 "register_operand")
1761 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1762 (match_operand:V2DI 2 "register_operand")))]
1765 enum rtx_code cmp_operator;
1786 rtx mask = gen_reg_rtx (V2DImode);
1787 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1788 emit_insn (gen_vec_cmpv2div2di (mask, cmp_fmt, operands[1], operands[2]));
1789 emit_insn (gen_vcond_mask_v2div2di (operands[0], operands[1],
1790 operands[2], mask));
1794 ;; Pairwise Integer Max/Min operations.
1795 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1796 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1797 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1798 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1801 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1802 [(set_attr "type" "neon_minmax<q>")]
1805 ;; Pairwise FP Max/Min operations.
1806 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1807 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1808 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1809 (match_operand:VHSDF 2 "register_operand" "w")]
1812 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1813 [(set_attr "type" "neon_minmax<q>")]
1816 ;; vec_concat gives a new vector with the low elements from operand 1, and
1817 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1818 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1819 ;; What that means, is that the RTL descriptions of the below patterns
1820 ;; need to change depending on endianness.
1822 ;; Narrowing operations.
1824 (define_insn "aarch64_xtn2<mode>_insn_le"
1825 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1826 (vec_concat:<VNARROWQ2>
1827 (match_operand:<VNARROWQ> 1 "register_operand" "0")
1828 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1829 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1830 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1831 [(set_attr "type" "neon_move_narrow_q")]
1834 (define_insn "aarch64_xtn2<mode>_insn_be"
1835 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1836 (vec_concat:<VNARROWQ2>
1837 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1838 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1839 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1840 "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1841 [(set_attr "type" "neon_move_narrow_q")]
1844 (define_expand "aarch64_xtn2<mode>"
1845 [(match_operand:<VNARROWQ2> 0 "register_operand")
1846 (match_operand:<VNARROWQ> 1 "register_operand")
1847 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1850 if (BYTES_BIG_ENDIAN)
1851 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1854 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1860 (define_insn "*aarch64_narrow_trunc<mode>"
1861 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1862 (vec_concat:<VNARROWQ2>
1863 (truncate:<VNARROWQ>
1864 (match_operand:VQN 1 "register_operand" "w"))
1865 (truncate:<VNARROWQ>
1866 (match_operand:VQN 2 "register_operand" "w"))))]
1869 if (!BYTES_BIG_ENDIAN)
1870 return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1872 return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1874 [(set_attr "type" "neon_permute<q>")]
1877 (define_insn "*aarch64_trunc_concat<mode>"
1878 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1879 (truncate:<VNARROWQ>
1881 (match_operand:<VHALF> 1 "register_operand" "w")
1882 (match_operand:<VHALF> 2 "register_operand" "w"))))]
1885 if (!BYTES_BIG_ENDIAN)
1886 return "uzp1\\t%0.<Vntype>, %1.<Vntype>, %2.<Vntype>";
1888 return "uzp1\\t%0.<Vntype>, %2.<Vntype>, %1.<Vntype>";
1890 [(set_attr "type" "neon_permute<q>")]
1895 (define_expand "vec_pack_trunc_<mode>"
1896 [(match_operand:<VNARROWD> 0 "register_operand")
1897 (match_operand:VDN 1 "general_operand")
1898 (match_operand:VDN 2 "general_operand")]
1901 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1902 emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1903 emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1909 (define_expand "vec_pack_trunc_<mode>"
1910 [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1911 (vec_concat:<VNARROWQ2>
1912 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1913 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1916 rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1917 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1918 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1920 emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1922 if (BYTES_BIG_ENDIAN)
1923 emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1926 emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1932 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1933 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1934 (vec_concat:<VNARROWQ2>
1935 (truncate:<VNARROWQ>
1936 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1937 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1938 (truncate:<VNARROWQ>
1939 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1941 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1942 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1943 [(set_attr "type" "neon_permute<q>")]
1946 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1947 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1948 (vec_concat:<VNARROWQ2>
1949 (truncate:<VNARROWQ>
1950 (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1951 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1952 (truncate:<VNARROWQ>
1953 (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1955 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1956 "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1957 [(set_attr "type" "neon_permute<q>")]
1960 ;; Widening operations.
1962 (define_insn_and_split "aarch64_simd_vec_unpack<su>_hi_<mode>"
1963 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1964 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1965 (match_operand:VQW 1 "register_operand" "w")
1966 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1969 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1970 "&& <CODE> == ZERO_EXTEND
1971 && aarch64_split_simd_shift_p (insn)"
1974 /* On many cores, it is cheaper to implement UXTL2 using a ZIP2 with zero,
1975 provided that the cost of the zero can be amortized over several
1976 operations. We'll later recombine the zero and zip if there are
1977 not sufficient uses of the zero to make the split worthwhile. */
1978 rtx res = simplify_gen_subreg (<MODE>mode, operands[0], <VWIDE>mode, 0);
1979 rtx zero = aarch64_gen_shareable_zero (<MODE>mode);
1980 emit_insn (gen_aarch64_zip2<mode> (res, operands[1], zero));
1983 [(set_attr "type" "neon_shift_imm_long")]
1986 (define_expand "vec_unpack<su>_hi_<mode>"
1987 [(match_operand:<VWIDE> 0 "register_operand")
1988 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1991 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1992 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1998 (define_expand "vec_unpack<su>_lo_<mode>"
1999 [(set (match_operand:<VWIDE> 0 "register_operand")
2000 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")))]
2003 operands[1] = lowpart_subreg (<VHALF>mode, operands[1], <MODE>mode);
2007 ;; Widening arithmetic.
2009 (define_insn "*aarch64_<su>mlal_lo<mode>"
2010 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2013 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2014 (match_operand:VQW 2 "register_operand" "w")
2015 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2016 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2017 (match_operand:VQW 4 "register_operand" "w")
2019 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2021 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2022 [(set_attr "type" "neon_mla_<Vetype>_long")]
2025 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2026 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2029 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2030 (match_operand:VQW 2 "register_operand" "w")
2031 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2032 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2033 (match_operand:VQW 4 "register_operand" "w")
2035 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2037 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2038 [(set_attr "type" "neon_mla_<Vetype>_long")]
2041 (define_expand "aarch64_<su>mlal_hi<mode>"
2042 [(match_operand:<VWIDE> 0 "register_operand")
2043 (match_operand:<VWIDE> 1 "register_operand")
2044 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2045 (match_operand:VQW 3 "register_operand")]
2048 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2049 emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2050 operands[2], p, operands[3]));
2055 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2056 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2061 (match_operand:VQ_HSI 2 "register_operand" "w")
2062 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2063 (vec_duplicate:<VWIDE>
2064 (ANY_EXTEND:<VWIDE_S>
2065 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2066 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2068 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2069 [(set_attr "type" "neon_mla_<Vetype>_long")]
2072 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2073 [(match_operand:<VWIDE> 0 "register_operand")
2074 (match_operand:<VWIDE> 1 "register_operand")
2075 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2076 (match_operand:<VEL> 3 "register_operand")]
2079 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2080 emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2081 operands[1], operands[2], p, operands[3]));
2086 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2087 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2089 (match_operand:<VWIDE> 1 "register_operand" "0")
2091 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2092 (match_operand:VQW 2 "register_operand" "w")
2093 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2094 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2095 (match_operand:VQW 4 "register_operand" "w")
2098 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2099 [(set_attr "type" "neon_mla_<Vetype>_long")]
2102 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2103 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2105 (match_operand:<VWIDE> 1 "register_operand" "0")
2107 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2108 (match_operand:VQW 2 "register_operand" "w")
2109 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2110 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2111 (match_operand:VQW 4 "register_operand" "w")
2114 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2115 [(set_attr "type" "neon_mla_<Vetype>_long")]
2118 (define_expand "aarch64_<su>mlsl_hi<mode>"
2119 [(match_operand:<VWIDE> 0 "register_operand")
2120 (match_operand:<VWIDE> 1 "register_operand")
2121 (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2122 (match_operand:VQW 3 "register_operand")]
2125 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2126 emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2127 operands[2], p, operands[3]));
2132 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2133 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2135 (match_operand:<VWIDE> 1 "register_operand" "0")
2139 (match_operand:VQ_HSI 2 "register_operand" "w")
2140 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2141 (vec_duplicate:<VWIDE>
2142 (ANY_EXTEND:<VWIDE_S>
2143 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2145 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2146 [(set_attr "type" "neon_mla_<Vetype>_long")]
2149 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2150 [(match_operand:<VWIDE> 0 "register_operand")
2151 (match_operand:<VWIDE> 1 "register_operand")
2152 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2153 (match_operand:<VEL> 3 "register_operand")]
2156 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2157 emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2158 operands[1], operands[2], p, operands[3]));
2163 (define_insn "aarch64_<su>mlal<mode>"
2164 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2168 (match_operand:VD_BHSI 2 "register_operand" "w"))
2170 (match_operand:VD_BHSI 3 "register_operand" "w")))
2171 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2173 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2174 [(set_attr "type" "neon_mla_<Vetype>_long")]
2177 (define_insn "aarch64_<su>mlal_n<mode>"
2178 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2182 (match_operand:VD_HSI 2 "register_operand" "w"))
2183 (vec_duplicate:<VWIDE>
2184 (ANY_EXTEND:<VWIDE_S>
2185 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2186 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2188 "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2189 [(set_attr "type" "neon_mla_<Vetype>_long")]
2192 (define_insn "aarch64_<su>mlsl<mode>"
2193 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2195 (match_operand:<VWIDE> 1 "register_operand" "0")
2198 (match_operand:VD_BHSI 2 "register_operand" "w"))
2200 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2202 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2203 [(set_attr "type" "neon_mla_<Vetype>_long")]
2206 (define_insn "aarch64_<su>mlsl_n<mode>"
2207 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2209 (match_operand:<VWIDE> 1 "register_operand" "0")
2212 (match_operand:VD_HSI 2 "register_operand" "w"))
2213 (vec_duplicate:<VWIDE>
2214 (ANY_EXTEND:<VWIDE_S>
2215 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2217 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2218 [(set_attr "type" "neon_mla_<Vetype>_long")]
2221 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2222 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2223 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2224 (match_operand:VQW 1 "register_operand" "w")
2225 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2226 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2227 (match_operand:VQW 2 "register_operand" "w")
2230 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2231 [(set_attr "type" "neon_mul_<Vetype>_long")]
2234 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2235 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2236 (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2237 (match_operand:VD_BHSI 1 "register_operand" "w"))
2239 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2241 "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2242 [(set_attr "type" "neon_mul_<Vetype>_long")]
2245 (define_expand "vec_widen_<su>mult_lo_<mode>"
2246 [(match_operand:<VWIDE> 0 "register_operand")
2247 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2248 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2251 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2252 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2259 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2260 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2261 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2262 (match_operand:VQW 1 "register_operand" "w")
2263 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2264 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2265 (match_operand:VQW 2 "register_operand" "w")
2268 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2269 [(set_attr "type" "neon_mul_<Vetype>_long")]
2272 (define_expand "vec_widen_<su>mult_hi_<mode>"
2273 [(match_operand:<VWIDE> 0 "register_operand")
2274 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2275 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2278 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2279 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2287 ;; vmull_lane_s16 intrinsics
2288 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2289 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2292 (match_operand:<VCOND> 1 "register_operand" "w"))
2293 (vec_duplicate:<VWIDE>
2294 (ANY_EXTEND:<VWIDE_S>
2296 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2297 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2300 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2301 return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2303 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2306 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2307 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2311 (match_operand:VQ_HSI 1 "register_operand" "w")
2312 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2313 (vec_duplicate:<VWIDE>
2314 (ANY_EXTEND:<VWIDE_S>
2316 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2317 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2320 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2321 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2323 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2326 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2327 [(match_operand:<VWIDE> 0 "register_operand")
2328 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2329 (match_operand:<VCOND> 2 "register_operand")
2330 (match_operand:SI 3 "immediate_operand")]
2333 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2334 emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2335 operands[1], p, operands[2], operands[3]));
2340 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2341 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2345 (match_operand:VQ_HSI 1 "register_operand" "w")
2346 (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2347 (vec_duplicate:<VWIDE>
2348 (ANY_EXTEND:<VWIDE_S>
2350 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2351 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2354 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2355 return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2357 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2360 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2361 [(match_operand:<VWIDE> 0 "register_operand")
2362 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2363 (match_operand:<VCONQ> 2 "register_operand")
2364 (match_operand:SI 3 "immediate_operand")]
2367 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2368 emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2369 operands[1], p, operands[2], operands[3]));
2374 (define_insn "aarch64_<su>mull_n<mode>"
2375 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2378 (match_operand:VD_HSI 1 "register_operand" "w"))
2379 (vec_duplicate:<VWIDE>
2380 (ANY_EXTEND:<VWIDE_S>
2381 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2383 "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2384 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2387 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2388 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2392 (match_operand:VQ_HSI 1 "register_operand" "w")
2393 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2394 (vec_duplicate:<VWIDE>
2395 (ANY_EXTEND:<VWIDE_S>
2396 (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2398 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2399 [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2402 (define_expand "aarch64_<su>mull_hi_n<mode>"
2403 [(match_operand:<VWIDE> 0 "register_operand")
2404 (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2405 (match_operand:<VEL> 2 "register_operand")]
2408 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2409 emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2415 ;; vmlal_lane_s16 intrinsics
2416 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2417 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2421 (match_operand:<VCOND> 2 "register_operand" "w"))
2422 (vec_duplicate:<VWIDE>
2423 (ANY_EXTEND:<VWIDE_S>
2425 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2426 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2427 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2430 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2431 return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2433 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2436 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2437 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2442 (match_operand:VQ_HSI 2 "register_operand" "w")
2443 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2444 (vec_duplicate:<VWIDE>
2445 (ANY_EXTEND:<VWIDE_S>
2447 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2448 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2449 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2452 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2453 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2455 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2458 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2459 [(match_operand:<VWIDE> 0 "register_operand")
2460 (match_operand:<VWIDE> 1 "register_operand")
2461 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2462 (match_operand:<VCOND> 3 "register_operand")
2463 (match_operand:SI 4 "immediate_operand")]
2466 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2467 emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2468 operands[1], operands[2], p, operands[3], operands[4]));
2473 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2474 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2479 (match_operand:VQ_HSI 2 "register_operand" "w")
2480 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2481 (vec_duplicate:<VWIDE>
2482 (ANY_EXTEND:<VWIDE_S>
2484 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2485 (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2486 (match_operand:<VWIDE> 1 "register_operand" "0")))]
2489 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2490 return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2492 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2495 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2496 [(match_operand:<VWIDE> 0 "register_operand")
2497 (match_operand:<VWIDE> 1 "register_operand")
2498 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2499 (match_operand:<VCONQ> 3 "register_operand")
2500 (match_operand:SI 4 "immediate_operand")]
2503 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2504 emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2505 operands[1], operands[2], p, operands[3], operands[4]));
2510 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2511 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2513 (match_operand:<VWIDE> 1 "register_operand" "0")
2516 (match_operand:<VCOND> 2 "register_operand" "w"))
2517 (vec_duplicate:<VWIDE>
2518 (ANY_EXTEND:<VWIDE_S>
2520 (match_operand:VDQHS 3 "register_operand" "<vwx>")
2521 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2524 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2525 return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2527 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2530 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2531 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2533 (match_operand:<VWIDE> 1 "register_operand" "0")
2537 (match_operand:VQ_HSI 2 "register_operand" "w")
2538 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2539 (vec_duplicate:<VWIDE>
2540 (ANY_EXTEND:<VWIDE_S>
2542 (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2543 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2547 operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2548 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2550 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2553 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2554 [(match_operand:<VWIDE> 0 "register_operand")
2555 (match_operand:<VWIDE> 1 "register_operand")
2556 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2557 (match_operand:<VCOND> 3 "register_operand")
2558 (match_operand:SI 4 "immediate_operand")]
2561 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2562 emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2563 operands[1], operands[2], p, operands[3], operands[4]));
2568 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2569 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2571 (match_operand:<VWIDE> 1 "register_operand" "0")
2575 (match_operand:VQ_HSI 2 "register_operand" "w")
2576 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2577 (vec_duplicate:<VWIDE>
2578 (ANY_EXTEND:<VWIDE_S>
2580 (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2581 (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2585 operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2586 return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2588 [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2591 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2592 [(match_operand:<VWIDE> 0 "register_operand")
2593 (match_operand:<VWIDE> 1 "register_operand")
2594 (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2595 (match_operand:<VCONQ> 3 "register_operand")
2596 (match_operand:SI 4 "immediate_operand")]
2599 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2600 emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2601 operands[1], operands[2], p, operands[3], operands[4]));
2606 ;; FP vector operations.
2607 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2608 ;; double-precision (64-bit) floating-point data types and arithmetic as
2609 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
2610 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2612 ;; Floating-point operations can raise an exception. Vectorizing such
2613 ;; operations are safe because of reasons explained below.
2615 ;; ARMv8 permits an extension to enable trapped floating-point
2616 ;; exception handling, however this is an optional feature. In the
2617 ;; event of a floating-point exception being raised by vectorised
2619 ;; 1. If trapped floating-point exceptions are available, then a trap
2620 ;; will be taken when any lane raises an enabled exception. A trap
2621 ;; handler may determine which lane raised the exception.
2622 ;; 2. Alternatively a sticky exception flag is set in the
2623 ;; floating-point status register (FPSR). Software may explicitly
2624 ;; test the exception flags, in which case the tests will either
2625 ;; prevent vectorisation, allowing precise identification of the
2626 ;; failing operation, or if tested outside of vectorisable regions
2627 ;; then the specific operation and lane are not of interest.
2629 ;; FP arithmetic operations.
2631 (define_insn "add<mode>3<vczle><vczbe>"
2632 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2633 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2634 (match_operand:VHSDF 2 "register_operand" "w")))]
2636 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2637 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2640 (define_insn "sub<mode>3<vczle><vczbe>"
2641 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2642 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2643 (match_operand:VHSDF 2 "register_operand" "w")))]
2645 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2646 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2649 (define_insn "mul<mode>3<vczle><vczbe>"
2650 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2651 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2652 (match_operand:VHSDF 2 "register_operand" "w")))]
2654 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2655 [(set_attr "type" "neon_fp_mul_<stype><q>")]
2658 (define_expand "div<mode>3"
2659 [(set (match_operand:VHSDF 0 "register_operand")
2660 (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2661 (match_operand:VHSDF 2 "register_operand")))]
2664 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2667 operands[1] = force_reg (<MODE>mode, operands[1]);
2670 (define_insn "*div<mode>3<vczle><vczbe>"
2671 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2672 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2673 (match_operand:VHSDF 2 "register_operand" "w")))]
2675 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2676 [(set_attr "type" "neon_fp_div_<stype><q>")]
2679 (define_insn "neg<mode>2<vczle><vczbe>"
2680 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2681 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2683 "fneg\\t%0.<Vtype>, %1.<Vtype>"
2684 [(set_attr "type" "neon_fp_neg_<stype><q>")]
2687 (define_insn "aarch64_fnegv2di2<vczle><vczbe>"
2688 [(set (match_operand:V2DI 0 "register_operand" "=w")
2689 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
2692 "fneg\\t%0.2d, %1.2d"
2693 [(set_attr "type" "neon_fp_neg_d")]
2696 (define_insn "abs<mode>2<vczle><vczbe>"
2697 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2698 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2700 "fabs\\t%0.<Vtype>, %1.<Vtype>"
2701 [(set_attr "type" "neon_fp_abs_<stype><q>")]
2704 (define_expand "aarch64_float_mla<mode>"
2705 [(set (match_operand:VDQF_DF 0 "register_operand")
2708 (match_operand:VDQF_DF 2 "register_operand")
2709 (match_operand:VDQF_DF 3 "register_operand"))
2710 (match_operand:VDQF_DF 1 "register_operand")))]
2713 rtx scratch = gen_reg_rtx (<MODE>mode);
2714 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2715 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2720 (define_expand "aarch64_float_mls<mode>"
2721 [(set (match_operand:VDQF_DF 0 "register_operand")
2723 (match_operand:VDQF_DF 1 "register_operand")
2725 (match_operand:VDQF_DF 2 "register_operand")
2726 (match_operand:VDQF_DF 3 "register_operand"))))]
2729 rtx scratch = gen_reg_rtx (<MODE>mode);
2730 emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2731 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2736 (define_expand "aarch64_float_mla_n<mode>"
2737 [(set (match_operand:VDQSF 0 "register_operand")
2740 (vec_duplicate:VDQSF
2741 (match_operand:<VEL> 3 "register_operand"))
2742 (match_operand:VDQSF 2 "register_operand"))
2743 (match_operand:VDQSF 1 "register_operand")))]
2746 rtx scratch = gen_reg_rtx (<MODE>mode);
2747 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2748 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2753 (define_expand "aarch64_float_mls_n<mode>"
2754 [(set (match_operand:VDQSF 0 "register_operand")
2756 (match_operand:VDQSF 1 "register_operand")
2758 (vec_duplicate:VDQSF
2759 (match_operand:<VEL> 3 "register_operand"))
2760 (match_operand:VDQSF 2 "register_operand"))))]
2763 rtx scratch = gen_reg_rtx (<MODE>mode);
2764 emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2765 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2770 (define_expand "aarch64_float_mla_lane<mode>"
2771 [(set (match_operand:VDQSF 0 "register_operand")
2774 (vec_duplicate:VDQSF
2776 (match_operand:V2SF 3 "register_operand")
2777 (parallel [(match_operand:SI 4 "immediate_operand")])))
2778 (match_operand:VDQSF 2 "register_operand"))
2779 (match_operand:VDQSF 1 "register_operand")))]
2782 rtx scratch = gen_reg_rtx (<MODE>mode);
2783 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2784 operands[3], operands[4]));
2785 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2790 (define_expand "aarch64_float_mls_lane<mode>"
2791 [(set (match_operand:VDQSF 0 "register_operand")
2793 (match_operand:VDQSF 1 "register_operand")
2795 (vec_duplicate:VDQSF
2797 (match_operand:V2SF 3 "register_operand")
2798 (parallel [(match_operand:SI 4 "immediate_operand")])))
2799 (match_operand:VDQSF 2 "register_operand"))))]
2802 rtx scratch = gen_reg_rtx (<MODE>mode);
2803 emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2804 operands[3], operands[4]));
2805 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2810 (define_expand "aarch64_float_mla_laneq<mode>"
2811 [(set (match_operand:VDQSF 0 "register_operand")
2814 (vec_duplicate:VDQSF
2816 (match_operand:V4SF 3 "register_operand")
2817 (parallel [(match_operand:SI 4 "immediate_operand")])))
2818 (match_operand:VDQSF 2 "register_operand"))
2819 (match_operand:VDQSF 1 "register_operand")))]
2822 rtx scratch = gen_reg_rtx (<MODE>mode);
2823 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2824 operands[3], operands[4]));
2825 emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2830 (define_expand "aarch64_float_mls_laneq<mode>"
2831 [(set (match_operand:VDQSF 0 "register_operand")
2833 (match_operand:VDQSF 1 "register_operand")
2835 (vec_duplicate:VDQSF
2837 (match_operand:V4SF 3 "register_operand")
2838 (parallel [(match_operand:SI 4 "immediate_operand")])))
2839 (match_operand:VDQSF 2 "register_operand"))))]
2842 rtx scratch = gen_reg_rtx (<MODE>mode);
2843 emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2844 operands[3], operands[4]));
2845 emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2850 (define_insn "fma<mode>4<vczle><vczbe>"
2851 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2852 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2853 (match_operand:VHSDF 2 "register_operand" "w")
2854 (match_operand:VHSDF 3 "register_operand" "0")))]
2856 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2857 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2860 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2861 [(set (match_operand:VDQF 0 "register_operand" "=w")
2865 (match_operand:VDQF 1 "register_operand" "<h_con>")
2866 (parallel [(match_operand:SI 2 "immediate_operand")])))
2867 (match_operand:VDQF 3 "register_operand" "w")
2868 (match_operand:VDQF 4 "register_operand" "0")))]
2871 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2872 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2874 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2877 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2878 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2880 (vec_duplicate:VDQSF
2882 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2883 (parallel [(match_operand:SI 2 "immediate_operand")])))
2884 (match_operand:VDQSF 3 "register_operand" "w")
2885 (match_operand:VDQSF 4 "register_operand" "0")))]
2888 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2889 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2891 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2894 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2895 [(set (match_operand:VMUL 0 "register_operand" "=w")
2898 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2899 (match_operand:VMUL 2 "register_operand" "w")
2900 (match_operand:VMUL 3 "register_operand" "0")))]
2902 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2903 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2906 (define_insn "*aarch64_fma4_elt_to_64v2df"
2907 [(set (match_operand:DF 0 "register_operand" "=w")
2910 (match_operand:V2DF 1 "register_operand" "w")
2911 (parallel [(match_operand:SI 2 "immediate_operand")]))
2912 (match_operand:DF 3 "register_operand" "w")
2913 (match_operand:DF 4 "register_operand" "0")))]
2916 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2917 return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2919 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2922 (define_insn "fnma<mode>4<vczle><vczbe>"
2923 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2925 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2926 (match_operand:VHSDF 2 "register_operand" "w")
2927 (match_operand:VHSDF 3 "register_operand" "0")))]
2929 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2930 [(set_attr "type" "neon_fp_mla_<stype><q>")]
2933 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2934 [(set (match_operand:VDQF 0 "register_operand" "=w")
2937 (match_operand:VDQF 3 "register_operand" "w"))
2940 (match_operand:VDQF 1 "register_operand" "<h_con>")
2941 (parallel [(match_operand:SI 2 "immediate_operand")])))
2942 (match_operand:VDQF 4 "register_operand" "0")))]
2945 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2946 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2948 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2951 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2952 [(set (match_operand:VDQSF 0 "register_operand" "=w")
2955 (match_operand:VDQSF 3 "register_operand" "w"))
2956 (vec_duplicate:VDQSF
2958 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2959 (parallel [(match_operand:SI 2 "immediate_operand")])))
2960 (match_operand:VDQSF 4 "register_operand" "0")))]
2963 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2964 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2966 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2969 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
2970 [(set (match_operand:VMUL 0 "register_operand" "=w")
2973 (match_operand:VMUL 2 "register_operand" "w"))
2975 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2976 (match_operand:VMUL 3 "register_operand" "0")))]
2978 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2979 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2982 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2983 [(set (match_operand:DF 0 "register_operand" "=w")
2986 (match_operand:V2DF 1 "register_operand" "w")
2987 (parallel [(match_operand:SI 2 "immediate_operand")]))
2989 (match_operand:DF 3 "register_operand" "w"))
2990 (match_operand:DF 4 "register_operand" "0")))]
2993 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2994 return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2996 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2999 ;; Vector versions of the floating-point frint patterns.
3000 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3001 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
3002 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3003 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3006 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3007 [(set_attr "type" "neon_fp_round_<stype><q>")]
3010 ;; Vector versions of the fcvt standard patterns.
3011 ;; Expands to lbtrunc, lround, lceil, lfloor
3012 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3013 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3014 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3015 [(match_operand:VHSDF 1 "register_operand" "w")]
3018 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3019 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3022 ;; HF Scalar variants of related SIMD instructions.
3023 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3024 [(set (match_operand:HI 0 "register_operand" "=w")
3025 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3027 "TARGET_SIMD_F16INST"
3028 "fcvt<frint_suffix><su>\t%h0, %h1"
3029 [(set_attr "type" "neon_fp_to_int_s")]
3032 (define_insn "<optab>_trunchfhi2"
3033 [(set (match_operand:HI 0 "register_operand" "=w")
3034 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3035 "TARGET_SIMD_F16INST"
3036 "fcvtz<su>\t%h0, %h1"
3037 [(set_attr "type" "neon_fp_to_int_s")]
3040 (define_insn "<optab>hihf2"
3041 [(set (match_operand:HF 0 "register_operand" "=w")
3042 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3043 "TARGET_SIMD_F16INST"
3044 "<su_optab>cvtf\t%h0, %h1"
3045 [(set_attr "type" "neon_int_to_fp_s")]
3048 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3049 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3050 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3052 (match_operand:VDQF 1 "register_operand" "w")
3053 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3056 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3057 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3059 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3061 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3062 output_asm_insn (buf, operands);
3065 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3068 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3069 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3070 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3071 [(match_operand:VHSDF 1 "register_operand")]
3076 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3077 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3078 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3079 [(match_operand:VHSDF 1 "register_operand")]
3084 (define_expand "ftrunc<VHSDF:mode>2"
3085 [(set (match_operand:VHSDF 0 "register_operand")
3086 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3091 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3092 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3094 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3096 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3097 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3100 ;; Conversions between vectors of floats and doubles.
3101 ;; Contains a mix of patterns to match standard pattern names
3102 ;; and those for intrinsics.
3104 ;; Float widening operations.
3106 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3107 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3108 (float_extend:<VWIDE> (vec_select:<VHALF>
3109 (match_operand:VQ_HSF 1 "register_operand" "w")
3110 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3113 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3114 [(set_attr "type" "neon_fp_cvt_widen_s")]
3117 ;; Convert between fixed-point and floating-point (vector modes)
3119 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3120 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3121 (unspec:<VHSDF:FCVT_TARGET>
3122 [(match_operand:VHSDF 1 "register_operand" "w")
3123 (match_operand:SI 2 "immediate_operand" "i")]
3126 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3127 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3130 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3131 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3132 (unspec:<VDQ_HSDI:FCVT_TARGET>
3133 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3134 (match_operand:SI 2 "immediate_operand" "i")]
3137 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3138 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3141 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3142 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3143 ;; the meaning of HI and LO changes depending on the target endianness.
3144 ;; While elsewhere we map the higher numbered elements of a vector to
3145 ;; the lower architectural lanes of the vector, for these patterns we want
3146 ;; to always treat "hi" as referring to the higher architectural lanes.
3147 ;; Consequently, while the patterns below look inconsistent with our
3148 ;; other big-endian patterns their behavior is as required.
3150 (define_expand "vec_unpacks_lo_<mode>"
3151 [(match_operand:<VWIDE> 0 "register_operand")
3152 (match_operand:VQ_HSF 1 "register_operand")]
3155 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3156 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3162 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3163 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3164 (float_extend:<VWIDE> (vec_select:<VHALF>
3165 (match_operand:VQ_HSF 1 "register_operand" "w")
3166 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3169 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3170 [(set_attr "type" "neon_fp_cvt_widen_s")]
3173 (define_expand "vec_unpacks_hi_<mode>"
3174 [(match_operand:<VWIDE> 0 "register_operand")
3175 (match_operand:VQ_HSF 1 "register_operand")]
3178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3184 (define_insn "extend<mode><Vwide>2"
3185 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (float_extend:<VWIDE>
3187 (match_operand:VDF 1 "register_operand" "w")))]
3189 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3190 [(set_attr "type" "neon_fp_cvt_widen_s")]
3193 ;; Float narrowing operations.
3195 (define_insn "aarch64_float_trunc_rodd_df"
3196 [(set (match_operand:SF 0 "register_operand" "=w")
3197 (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3201 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3204 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3205 [(set (match_operand:V2SF 0 "register_operand" "=w")
3206 (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3209 "fcvtxn\\t%0.2s, %1.2d"
3210 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3213 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3214 [(set (match_operand:V4SF 0 "register_operand" "=w")
3216 (match_operand:V2SF 1 "register_operand" "0")
3217 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3219 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3220 "fcvtxn2\\t%0.4s, %2.2d"
3221 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3224 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3225 [(set (match_operand:V4SF 0 "register_operand" "=w")
3227 (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3229 (match_operand:V2SF 1 "register_operand" "0")))]
3230 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3231 "fcvtxn2\\t%0.4s, %2.2d"
3232 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3235 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3236 [(match_operand:V4SF 0 "register_operand")
3237 (match_operand:V2SF 1 "register_operand")
3238 (match_operand:V2DF 2 "register_operand")]
3241 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3242 ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3243 : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3244 emit_insn (gen (operands[0], operands[1], operands[2]));
3249 (define_insn "trunc<Vwide><mode>2<vczle><vczbe>"
3250 [(set (match_operand:VDF 0 "register_operand" "=w")
3252 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3254 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3255 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3258 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3259 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3261 (match_operand:VDF 1 "register_operand" "0")
3263 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3264 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3265 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3266 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3269 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3270 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3273 (match_operand:<VWIDE> 2 "register_operand" "w"))
3274 (match_operand:VDF 1 "register_operand" "0")))]
3275 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3276 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3277 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3280 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3281 [(match_operand:<VDBL> 0 "register_operand")
3282 (match_operand:VDF 1 "register_operand")
3283 (match_operand:<VWIDE> 2 "register_operand")]
3286 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3287 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3288 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3289 emit_insn (gen (operands[0], operands[1], operands[2]));
3294 (define_expand "vec_pack_trunc_v2df"
3295 [(set (match_operand:V4SF 0 "register_operand")
3297 (float_truncate:V2SF
3298 (match_operand:V2DF 1 "register_operand"))
3299 (float_truncate:V2SF
3300 (match_operand:V2DF 2 "register_operand"))
3304 rtx tmp = gen_reg_rtx (V2SFmode);
3305 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3306 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3308 emit_insn (gen_truncv2dfv2sf2 (tmp, operands[lo]));
3309 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3310 tmp, operands[hi]));
3315 (define_expand "vec_pack_trunc_df"
3316 [(set (match_operand:V2SF 0 "register_operand")
3318 (float_truncate:SF (match_operand:DF 1 "general_operand"))
3319 (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3322 rtx tmp = gen_reg_rtx (V2SFmode);
3323 emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3324 emit_insn (gen_truncv2dfv2sf2 (operands[0], tmp));
3330 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
3332 ;; a = (b < c) ? b : c;
3333 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3334 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3337 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3338 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3339 ;; operand will be returned when both operands are zero (i.e. they may not
3340 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
3341 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3344 (define_insn "<su><maxmin><mode>3"
3345 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3346 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3347 (match_operand:VHSDF 2 "register_operand" "w")))]
3349 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3350 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3353 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3354 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3355 ;; which implement the IEEE fmax ()/fmin () functions.
3356 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3357 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3358 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3359 (match_operand:VHSDF 2 "register_operand" "w")]
3362 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3363 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3366 ;; 'across lanes' add.
3368 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3369 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3370 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3371 (match_operand:VHSDF 2 "register_operand" "w")]
3374 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3375 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3378 (define_insn "reduc_plus_scal_<mode>"
3379 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3380 (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3383 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3384 [(set_attr "type" "neon_reduc_add<q>")]
3387 (define_insn "reduc_plus_scal_v2si"
3388 [(set (match_operand:SI 0 "register_operand" "=w")
3389 (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3392 "addp\\t%0.2s, %1.2s, %1.2s"
3393 [(set_attr "type" "neon_reduc_add")]
3396 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3397 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3398 [(set (match_operand:GPI 0 "register_operand" "=w")
3400 (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3403 "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3404 [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3407 (define_insn "reduc_plus_scal_<mode>"
3408 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3409 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3412 "faddp\\t%<Vetype>0, %1.<Vtype>"
3413 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3416 (define_expand "reduc_plus_scal_v4sf"
3417 [(set (match_operand:SF 0 "register_operand")
3418 (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3422 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3423 rtx scratch = gen_reg_rtx (V4SFmode);
3424 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3425 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3426 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3430 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3431 ;; sign or zero-extends its elements.
3432 (define_insn "aarch64_<su>addlv<mode>"
3433 [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3435 [(ANY_EXTEND:<V2XWIDE>
3436 (match_operand:VDQV_L 1 "register_operand" "w"))]
3439 "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3440 [(set_attr "type" "neon_reduc_add<q>")]
3443 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3444 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3445 ;; of that vector are used. We can greatly simplify the RTL expression using
3447 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3448 [(set (match_operand:<VWIDE_S> 0 "register_operand")
3452 (ANY_EXTEND:<V2XWIDE>
3453 (match_operand:VDQV_L 1 "register_operand"))
3454 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3455 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3456 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3458 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3463 [(ANY_EXTEND:<V2XWIDE>
3469 ;; Similar to the above but for two-step zero-widening reductions.
3470 ;; We can push the outer zero_extend outside the ADDV unspec and make
3471 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3472 ;; in a single instruction.
3473 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3474 [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3476 [(zero_extend:<VQUADW>
3479 (zero_extend:<V2XWIDE>
3480 (match_operand:VDQQH 1 "register_operand" "w"))
3481 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3482 (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3483 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3485 "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3489 (zero_extend:<VWIDE2X_S>
3491 [(zero_extend:<V2XWIDE>
3497 ;; Zero-extending version of the above. As these intrinsics produce a scalar
3498 ;; value that may be used by further intrinsics we want to avoid moving the
3499 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3501 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3502 [(set (match_operand:GPI 0 "register_operand" "=w")
3505 [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3506 (match_operand:VDQV_L 1 "register_operand" "w"))]
3509 && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3510 "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3511 [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3514 (define_expand "@aarch64_<su>addlp<mode>"
3515 [(set (match_operand:<VDBLW> 0 "register_operand")
3518 (ANY_EXTEND:<V2XWIDE>
3519 (match_operand:VDQV_L 1 "register_operand"))
3521 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3525 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3526 operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3527 operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3531 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3532 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3535 (ANY_EXTEND:<V2XWIDE>
3536 (match_operand:VDQV_L 1 "register_operand" "w"))
3537 (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3538 (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3539 (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3541 && !rtx_equal_p (operands[2], operands[3])"
3542 "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3543 [(set_attr "type" "neon_reduc_add<q>")]
3546 (define_insn "clrsb<mode>2<vczle><vczbe>"
3547 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3548 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3550 "cls\\t%0.<Vtype>, %1.<Vtype>"
3551 [(set_attr "type" "neon_cls<q>")]
3554 (define_insn "clz<mode>2<vczle><vczbe>"
3555 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3556 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3558 "clz\\t%0.<Vtype>, %1.<Vtype>"
3559 [(set_attr "type" "neon_cls<q>")]
3562 (define_insn "popcount<mode>2<vczle><vczbe>"
3563 [(set (match_operand:VB 0 "register_operand" "=w")
3564 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3566 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3567 [(set_attr "type" "neon_cnt<q>")]
3570 (define_expand "popcount<mode>2"
3571 [(set (match_operand:VDQHSD_V1DI 0 "register_operand")
3572 (popcount:VDQHSD_V1DI
3573 (match_operand:VDQHSD_V1DI 1 "register_operand")))]
3578 rtx p = aarch64_ptrue_reg (<VPRED>mode, <bitsize> == 64 ? 8 : 16);
3579 emit_insn (gen_aarch64_pred_popcount<mode> (operands[0],
3585 if (<MODE>mode == V1DImode)
3587 rtx out = gen_reg_rtx (DImode);
3588 emit_insn (gen_popcountdi2 (out, gen_lowpart (DImode, operands[1])));
3589 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, out));
3593 /* Generate a byte popcount. */
3594 machine_mode mode = <bitsize> == 64 ? V8QImode : V16QImode;
3595 machine_mode mode2 = <bitsize> == 64 ? V2SImode : V4SImode;
3596 rtx tmp = gen_reg_rtx (mode);
3597 auto icode = optab_handler (popcount_optab, mode);
3598 emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode, operands[1])));
3601 && (<VEL>mode == SImode || <VEL>mode == DImode))
3603 /* For V4SI and V2SI, we can generate a UDOT with a 0 accumulator and a
3604 1 multiplicand. For V2DI, another UAADDLP is needed. */
3605 rtx ones = force_reg (mode, CONST1_RTX (mode));
3606 auto icode = convert_optab_handler (udot_prod_optab, mode2, mode);
3607 mode = <bitsize> == 64 ? V2SImode : V4SImode;
3608 rtx dest = mode == <MODE>mode ? operands[0] : gen_reg_rtx (mode);
3609 rtx zeros = force_reg (mode, CONST0_RTX (mode));
3610 emit_insn (GEN_FCN (icode) (dest, tmp, ones, zeros));
3614 /* Use a sequence of UADDLPs to accumulate the counts. Each step doubles
3615 the element size and halves the number of elements. */
3616 while (mode != <MODE>mode)
3618 auto icode = code_for_aarch64_addlp (ZERO_EXTEND, GET_MODE (tmp));
3619 mode = insn_data[icode].operand[0].mode;
3620 rtx dest = mode == <MODE>mode ? operands[0] : gen_reg_rtx (mode);
3621 emit_insn (GEN_FCN (icode) (dest, tmp));
3628 ;; 'across lanes' max and min ops.
3630 ;; Template for outputting a scalar, so we can create __builtins which can be
3631 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
3632 (define_expand "reduc_<optab>_scal_<mode>"
3633 [(match_operand:<VEL> 0 "register_operand")
3634 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3638 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3639 rtx scratch = gen_reg_rtx (<MODE>mode);
3640 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3642 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3647 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3648 [(match_operand:<VEL> 0 "register_operand")
3649 (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3653 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3658 ;; Likewise for integer cases, signed and unsigned.
3659 (define_expand "reduc_<optab>_scal_<mode>"
3660 [(match_operand:<VEL> 0 "register_operand")
3661 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3665 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3666 rtx scratch = gen_reg_rtx (<MODE>mode);
3667 emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3669 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3674 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3675 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3676 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3679 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3680 [(set_attr "type" "neon_reduc_minmax<q>")]
3683 (define_insn "aarch64_reduc_<optab>_internalv2si"
3684 [(set (match_operand:V2SI 0 "register_operand" "=w")
3685 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3688 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3689 [(set_attr "type" "neon_reduc_minmax")]
3692 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3693 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3694 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3697 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3698 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3701 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3703 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3706 ;; Thus our BSL is of the form:
3707 ;; op0 = bsl (mask, op2, op3)
3708 ;; We can use any of:
3711 ;; bsl mask, op1, op2
3712 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3713 ;; bit op0, op2, mask
3714 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3715 ;; bif op0, op1, mask
3717 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3718 ;; Some forms of straight-line code may generate the equivalent form
3719 ;; in *aarch64_simd_bsl<mode>_alt.
3721 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3722 [(set (match_operand:VDQ_I 0 "register_operand")
3726 (match_operand:<V_INT_EQUIV> 3 "register_operand")
3727 (match_operand:VDQ_I 2 "register_operand"))
3728 (match_operand:VDQ_I 1 "register_operand"))
3729 (match_dup:<V_INT_EQUIV> 3)
3732 {@ [ cons: =0 , 1 , 2 , 3 ]
3733 [ w , 0 , w , w ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3734 [ w , w , w , 0 ] bit\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3735 [ w , w , 0 , w ] bif\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3737 [(set_attr "type" "neon_bsl<q>")]
3740 ;; We need this form in addition to the above pattern to match the case
3741 ;; when combine tries merging three insns such that the second operand of
3742 ;; the outer XOR matches the second operand of the inner XOR rather than
3743 ;; the first. The two are equivalent but since recog doesn't try all
3744 ;; permutations of commutative operations, we have to have a separate pattern.
3746 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3747 [(set (match_operand:VDQ_I 0 "register_operand")
3751 (match_operand:VDQ_I 3 "register_operand")
3752 (match_operand:<V_INT_EQUIV> 2 "register_operand"))
3753 (match_operand:VDQ_I 1 "register_operand"))
3754 (match_dup:<V_INT_EQUIV> 2)))]
3756 {@ [ cons: =0 , 1 , 2 , 3 ]
3757 [ w , 0 , w , w ] bsl\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3758 [ w , w , 0 , w ] bit\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3759 [ w , w , w , 0 ] bif\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3761 [(set_attr "type" "neon_bsl<q>")]
3764 ;; DImode is special, we want to avoid computing operations which are
3765 ;; more naturally computed in general purpose registers in the vector
3766 ;; registers. If we do that, we need to move all three operands from general
3767 ;; purpose registers to vector registers, then back again. However, we
3768 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3769 ;; optimizations based on the component operations of a BSL.
3771 ;; That means we need a splitter back to the individual operations, if they
3772 ;; would be better calculated on the integer side.
3774 (define_insn_and_split "aarch64_simd_bsldi_internal"
3775 [(set (match_operand:DI 0 "register_operand")
3779 (match_operand:DI 3 "register_operand")
3780 (match_operand:DI 2 "register_operand"))
3781 (match_operand:DI 1 "register_operand"))
3785 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3786 [ w , 0 , w , w ; neon_bsl , 4 ] bsl\t%0.8b, %2.8b, %3.8b
3787 [ w , w , w , 0 ; neon_bsl , 4 ] bit\t%0.8b, %2.8b, %1.8b
3788 [ w , w , 0 , w ; neon_bsl , 4 ] bif\t%0.8b, %3.8b, %1.8b
3789 [ &r , r , r , r ; multiple , 12 ] #
3791 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3792 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3794 /* Split back to individual operations. If we're before reload, and
3795 able to create a temporary register, do so. If we're after reload,
3796 we've got an early-clobber destination register, so use that.
3797 Otherwise, we can't create pseudos and we can't yet guarantee that
3798 operands[0] is safe to write, so FAIL to split. */
3801 if (reload_completed)
3802 scratch = operands[0];
3803 else if (can_create_pseudo_p ())
3804 scratch = gen_reg_rtx (DImode);
3808 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3809 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3810 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3815 (define_insn_and_split "aarch64_simd_bsldi_alt"
3816 [(set (match_operand:DI 0 "register_operand")
3820 (match_operand:DI 3 "register_operand")
3821 (match_operand:DI 2 "register_operand"))
3822 (match_operand:DI 1 "register_operand"))
3826 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3827 [ w , 0 , w , w ; neon_bsl , 4 ] bsl\t%0.8b, %3.8b, %2.8b
3828 [ w , w , 0 , w ; neon_bsl , 4 ] bit\t%0.8b, %3.8b, %1.8b
3829 [ w , w , w , 0 ; neon_bsl , 4 ] bif\t%0.8b, %2.8b, %1.8b
3830 [ &r , r , r , r ; multiple , 12 ] #
3832 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3833 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3835 /* Split back to individual operations. If we're before reload, and
3836 able to create a temporary register, do so. If we're after reload,
3837 we've got an early-clobber destination register, so use that.
3838 Otherwise, we can't create pseudos and we can't yet guarantee that
3839 operands[0] is safe to write, so FAIL to split. */
3842 if (reload_completed)
3843 scratch = operands[0];
3844 else if (can_create_pseudo_p ())
3845 scratch = gen_reg_rtx (DImode);
3849 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3850 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3851 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3856 (define_expand "@aarch64_simd_bsl<mode>"
3857 [(match_operand:VALLDIF 0 "register_operand")
3858 (match_operand:<V_INT_EQUIV> 1 "register_operand")
3859 (match_operand:VALLDIF 2 "register_operand")
3860 (match_operand:VALLDIF 3 "register_operand")]
3863 /* We can't alias operands together if they have different modes. */
3864 rtx tmp = operands[0];
3865 if (FLOAT_MODE_P (<MODE>mode))
3867 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3868 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3869 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3871 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3872 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3876 if (tmp != operands[0])
3877 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3882 (define_expand "vcond_mask_<mode><v_int_equiv>"
3883 [(match_operand:VALLDI 0 "register_operand")
3884 (match_operand:VALLDI 1 "nonmemory_operand")
3885 (match_operand:VALLDI 2 "nonmemory_operand")
3886 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3889 /* If we have (a = (P) ? -1 : 0);
3890 Then we can simply move the generated mask (result must be int). */
3891 if (operands[1] == CONSTM1_RTX (<MODE>mode)
3892 && operands[2] == CONST0_RTX (<MODE>mode))
3893 emit_move_insn (operands[0], operands[3]);
3894 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
3895 else if (operands[1] == CONST0_RTX (<MODE>mode)
3896 && operands[2] == CONSTM1_RTX (<MODE>mode))
3897 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3900 if (!REG_P (operands[1]))
3901 operands[1] = force_reg (<MODE>mode, operands[1]);
3902 if (!REG_P (operands[2]))
3903 operands[2] = force_reg (<MODE>mode, operands[2]);
3904 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3905 operands[1], operands[2]));
3911 ;; Patterns comparing two vectors and conditionally jump
3913 (define_expand "cbranch<mode>4"
3916 (match_operator 0 "aarch64_equality_operator"
3917 [(match_operand:VDQ_I 1 "register_operand")
3918 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")])
3919 (label_ref (match_operand 3 ""))
3923 auto code = GET_CODE (operands[0]);
3924 rtx tmp = operands[1];
3926 /* If comparing against a non-zero vector we have to do a comparison first
3927 so we can have a != 0 comparison with the result. */
3928 if (operands[2] != CONST0_RTX (<MODE>mode))
3930 tmp = gen_reg_rtx (<MODE>mode);
3931 emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2]));
3934 /* For 64-bit vectors we need no reductions. */
3935 if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
3937 /* Always reduce using a V4SI. */
3938 rtx reduc = gen_lowpart (V4SImode, tmp);
3939 rtx res = gen_reg_rtx (V4SImode);
3940 emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
3941 emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
3944 rtx val = gen_reg_rtx (DImode);
3945 emit_move_insn (val, gen_lowpart (DImode, tmp));
3947 rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
3948 rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
3949 emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
3953 ;; Patterns comparing two vectors to produce a mask.
3955 (define_expand "vec_cmp<mode><mode>"
3956 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3957 (match_operator 1 "comparison_operator"
3958 [(match_operand:VSDQ_I_DI 2 "register_operand")
3959 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3962 rtx mask = operands[0];
3963 enum rtx_code code = GET_CODE (operands[1]);
3973 if (operands[3] == CONST0_RTX (<MODE>mode))
3978 if (!REG_P (operands[3]))
3979 operands[3] = force_reg (<MODE>mode, operands[3]);
3987 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3991 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3995 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3999 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
4003 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
4007 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
4011 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
4015 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
4019 /* Handle NE as !EQ. */
4020 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
4021 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
4025 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
4035 (define_expand "vec_cmp<mode><v_int_equiv>"
4036 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
4037 (match_operator 1 "comparison_operator"
4038 [(match_operand:VDQF 2 "register_operand")
4039 (match_operand:VDQF 3 "nonmemory_operand")]))]
4042 int use_zero_form = 0;
4043 enum rtx_code code = GET_CODE (operands[1]);
4044 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
4046 rtx (*comparison) (rtx, rtx, rtx) = NULL;
4055 if (operands[3] == CONST0_RTX (<MODE>mode))
4062 if (!REG_P (operands[3]))
4063 operands[3] = force_reg (<MODE>mode, operands[3]);
4073 comparison = gen_aarch64_cmlt<mode>;
4078 std::swap (operands[2], operands[3]);
4082 comparison = gen_aarch64_cmgt<mode>;
4087 comparison = gen_aarch64_cmle<mode>;
4092 std::swap (operands[2], operands[3]);
4096 comparison = gen_aarch64_cmge<mode>;
4100 comparison = gen_aarch64_cmeq<mode>;
4118 /* All of the above must not raise any FP exceptions. Thus we first
4119 check each operand for NaNs and force any elements containing NaN to
4120 zero before using them in the compare.
4121 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4122 (cm<cc> (isnan (a) ? 0.0 : a,
4123 isnan (b) ? 0.0 : b))
4124 We use the following transformations for doing the comparisions:
4128 a UNLT b -> b GT a. */
4130 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4131 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4132 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4133 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4134 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4135 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4136 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4137 lowpart_subreg (<V_INT_EQUIV>mode,
4140 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4141 lowpart_subreg (<V_INT_EQUIV>mode,
4144 gcc_assert (comparison != NULL);
4145 emit_insn (comparison (operands[0],
4146 lowpart_subreg (<MODE>mode,
4147 tmp0, <V_INT_EQUIV>mode),
4148 lowpart_subreg (<MODE>mode,
4149 tmp1, <V_INT_EQUIV>mode)));
4150 emit_insn (gen_iorn<v_int_equiv>3 (operands[0], operands[0], tmp2));
4160 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
4161 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
4167 a NE b -> ~(a EQ b) */
4168 gcc_assert (comparison != NULL);
4169 emit_insn (comparison (operands[0], operands[2], operands[3]));
4171 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4175 /* LTGT is not guranteed to not generate a FP exception. So let's
4176 go the faster way : ((a > b) || (b > a)). */
4177 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4178 operands[2], operands[3]));
4179 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4180 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4186 /* cmeq (a, a) & cmeq (b, b). */
4187 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4188 operands[2], operands[2]));
4189 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4190 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4192 if (code == UNORDERED)
4193 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4194 else if (code == UNEQ)
4196 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4197 emit_insn (gen_iorn<v_int_equiv>3 (operands[0], tmp, operands[0]));
4208 (define_expand "vec_cmpu<mode><mode>"
4209 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4210 (match_operator 1 "comparison_operator"
4211 [(match_operand:VSDQ_I_DI 2 "register_operand")
4212 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4215 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4216 operands[2], operands[3]));
4220 ;; Patterns for AArch64 SIMD Intrinsics.
4222 ;; Lane extraction with sign extension to general purpose register.
4223 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4224 [(set (match_operand:GPI 0 "register_operand" "=r")
4226 (vec_select:<VDQQH:VEL>
4227 (match_operand:VDQQH 1 "register_operand" "w")
4228 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4231 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4232 INTVAL (operands[2]));
4233 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4235 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4238 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4239 [(set (match_operand:GPI 0 "register_operand" "=r")
4241 (vec_select:<VDQQH:VEL>
4242 (match_operand:VDQQH 1 "register_operand" "w")
4243 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4246 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4247 INTVAL (operands[2]));
4248 return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4250 [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4253 ;; Lane extraction of a value, neither sign nor zero extension
4254 ;; is guaranteed so upper bits should be considered undefined.
4255 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4256 ;; Extracting lane zero is split into a simple move when it is between SIMD
4257 ;; registers or a store.
4258 (define_insn_and_split "@aarch64_get_lane<mode>"
4259 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4261 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4262 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4265 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4266 switch (which_alternative)
4269 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4271 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4273 return "st1\\t{%1.<Vetype>}[%2], %0";
4278 "&& reload_completed
4279 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4280 [(set (match_dup 0) (match_dup 1))]
4282 operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4284 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4287 (define_insn "*aarch64_get_high<mode>"
4288 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4290 (match_operand:VQ_2E 1 "register_operand" "w")
4291 (parallel [(match_operand:SI 2 "immediate_operand")])))]
4292 "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4294 [(set_attr "type" "f_mrc")]
4297 (define_insn "load_pair_lanes<mode>"
4298 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4300 (match_operand:VDCSIF 1 "memory_operand" "Utq")
4301 (match_operand:VDCSIF 2 "memory_operand" "m")))]
4303 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4304 "ldr\\t%<single_dtype>0, %1"
4305 [(set_attr "type" "neon_load1_1reg<dblq>")]
4308 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4309 ;; below. The reason for having both of them is that the alternatives of
4310 ;; the later patterns do not have consistent register preferences: the STP
4311 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4312 ;; the GPR form is more natural for scalar integers) whereas the other
4313 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4315 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4316 ;; which the destination was always memory. On the other hand, expressing
4317 ;; the true preferences makes GPRs seem more palatable than they really are
4318 ;; for register destinations.
4320 ;; Despite that, we do still want the general form to have STP alternatives,
4321 ;; in order to handle cases where a register destination is spilled.
4323 ;; The best compromise therefore seemed to be to have a dedicated STP
4324 ;; pattern to catch cases in which the destination was always memory.
4325 ;; This dedicated pattern must come first.
4327 (define_insn "store_pair_lanes<mode>"
4328 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand")
4330 (match_operand:VDCSIF 1 "register_operand")
4331 (match_operand:VDCSIF 2 "register_operand")))]
4333 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
4334 [ Umn , w , w ; neon_stp ] stp\t%<single_type>1, %<single_type>2, %y0
4335 [ Umn , r , r ; store_16 ] stp\t%<single_wx>1, %<single_wx>2, %y0
4339 ;; Form a vector whose least significant half comes from operand 1 and whose
4340 ;; most significant half comes from operand 2. The register alternatives
4341 ;; tie the least significant half to the same register as the destination,
4342 ;; so that only the other half needs to be handled explicitly. For the
4343 ;; reasons given above, the STP alternatives use ? for constraints that
4344 ;; the register alternatives either don't accept or themselves disparage.
4346 (define_insn "*aarch64_combine_internal<mode>"
4347 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4349 (match_operand:VDCSIF 1 "register_operand")
4350 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")))]
4352 && !BYTES_BIG_ENDIAN
4353 && (register_operand (operands[0], <VDBL>mode)
4354 || register_operand (operands[2], <MODE>mode))"
4355 {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ]
4356 [ w , w , w ; neon_permute<dblq> , simd ] uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
4357 [ w , 0 , ?r ; neon_from_gp<dblq> , simd ] ins\t%0.<single_type>[1], %<single_wx>2
4358 [ w , 0 , ?r ; f_mcr , * ] fmov\t%0.d[1], %2
4359 [ w , 0 , Utv ; neon_load1_one_lane<dblq> , simd ] ld1\t{%0.<single_type>}[1], %2
4360 [ Umn , ?w , w ; neon_stp , * ] stp\t%<single_type>1, %<single_type>2, %y0
4361 [ Umn , ?r , ?r ; store_16 , * ] stp\t%<single_wx>1, %<single_wx>2, %y0
4365 (define_insn "*aarch64_combine_internal_be<mode>"
4366 [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4368 (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")
4369 (match_operand:VDCSIF 1 "register_operand")))]
4372 && (register_operand (operands[0], <VDBL>mode)
4373 || register_operand (operands[2], <MODE>mode))"
4374 {@ [ cons: =0 , 1 , 2 ; attrs: type , arch ]
4375 [ w , w , w ; neon_permute<dblq> , simd ] uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
4376 [ w , 0 , ?r ; neon_from_gp<dblq> , simd ] ins\t%0.<single_type>[1], %<single_wx>2
4377 [ w , 0 , ?r ; f_mcr , * ] fmov\t%0.d[1], %2
4378 [ w , 0 , Utv ; neon_load1_one_lane<dblq> , simd ] ld1\t{%0.<single_type>}[1], %2
4379 [ Umn , ?w , ?w ; neon_stp , * ] stp\t%<single_type>2, %<single_type>1, %y0
4380 [ Umn , ?r , ?r ; store_16 , * ] stp\t%<single_wx>2, %<single_wx>1, %y0
4384 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4387 (define_insn "*aarch64_combinez<mode>"
4388 [(set (match_operand:<VDBL> 0 "register_operand")
4390 (match_operand:VDCSIF 1 "nonimmediate_operand")
4391 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4392 "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4393 {@ [ cons: =0 , 1 ; attrs: type ]
4394 [ w , w ; neon_move<q> ] fmov\t%<single_type>0, %<single_type>1
4395 [ w , ?r ; neon_from_gp ] fmov\t%<single_type>0, %<single_wx>1
4396 [ w , m ; neon_load1_1reg ] ldr\t%<single_type>0, %1
4400 (define_insn "*aarch64_combinez_be<mode>"
4401 [(set (match_operand:<VDBL> 0 "register_operand")
4403 (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4404 (match_operand:VDCSIF 1 "nonimmediate_operand")))]
4405 "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4406 {@ [ cons: =0 , 1 ; attrs: type ]
4407 [ w , w ; neon_move<q> ] fmov\t%<single_type>0, %<single_type>1
4408 [ w , ?r ; neon_from_gp ] fmov\t%<single_type>0, %<single_wx>1
4409 [ w , m ; neon_load1_1reg ] ldr\t%<single_type>0, %1
4413 ;; Form a vector whose first half (in array order) comes from operand 1
4414 ;; and whose second half (in array order) comes from operand 2.
4415 ;; This operand order follows the RTL vec_concat operation.
4416 (define_expand "@aarch64_vec_concat<mode>"
4417 [(set (match_operand:<VDBL> 0 "register_operand")
4419 (match_operand:VDCSIF 1 "general_operand")
4420 (match_operand:VDCSIF 2 "general_operand")))]
4423 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4424 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4426 if (MEM_P (operands[1])
4427 && MEM_P (operands[2])
4428 && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4429 /* Use load_pair_lanes<mode>. */
4431 else if (operands[hi] == CONST0_RTX (<MODE>mode))
4433 /* Use *aarch64_combinez<mode>. */
4434 if (!nonimmediate_operand (operands[lo], <MODE>mode))
4435 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4439 /* Use *aarch64_combine_internal<mode>. */
4440 operands[lo] = force_reg (<MODE>mode, operands[lo]);
4441 if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4443 if (MEM_P (operands[hi]))
4445 rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4446 operands[hi] = replace_equiv_address (operands[hi], addr);
4449 operands[hi] = force_reg (<MODE>mode, operands[hi]);
4454 ;; Form a vector whose least significant half comes from operand 1 and whose
4455 ;; most significant half comes from operand 2. This operand order follows
4456 ;; arm_neon.h vcombine* intrinsics.
4457 (define_expand "@aarch64_combine<mode>"
4458 [(match_operand:<VDBL> 0 "register_operand")
4459 (match_operand:VDC 1 "general_operand")
4460 (match_operand:VDC 2 "general_operand")]
4463 if (BYTES_BIG_ENDIAN)
4464 std::swap (operands[1], operands[2]);
4465 emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4471 ;; <su><addsub>l<q>.
4473 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4474 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4475 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4476 (match_operand:VQW 1 "register_operand" "w")
4477 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4478 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4479 (match_operand:VQW 2 "register_operand" "w")
4482 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4483 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4486 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4487 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4488 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4489 (match_operand:VQW 1 "register_operand" "w")
4490 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4491 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4492 (match_operand:VQW 2 "register_operand" "w")
4495 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4496 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4499 (define_expand "vec_widen_<su>add_lo_<mode>"
4500 [(match_operand:<VWIDE> 0 "register_operand")
4501 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4502 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4505 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4506 emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4511 (define_expand "vec_widen_<su>add_hi_<mode>"
4512 [(match_operand:<VWIDE> 0 "register_operand")
4513 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4514 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4517 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4518 emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4523 (define_expand "vec_widen_<su>sub_lo_<mode>"
4524 [(match_operand:<VWIDE> 0 "register_operand")
4525 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4526 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4529 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4530 emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4535 (define_expand "vec_widen_<su>sub_hi_<mode>"
4536 [(match_operand:<VWIDE> 0 "register_operand")
4537 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4538 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4541 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4542 emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4547 (define_expand "aarch64_saddl2<mode>"
4548 [(match_operand:<VWIDE> 0 "register_operand")
4549 (match_operand:VQW 1 "register_operand")
4550 (match_operand:VQW 2 "register_operand")]
4553 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4554 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4559 (define_expand "aarch64_uaddl2<mode>"
4560 [(match_operand:<VWIDE> 0 "register_operand")
4561 (match_operand:VQW 1 "register_operand")
4562 (match_operand:VQW 2 "register_operand")]
4565 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4566 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4571 (define_expand "aarch64_ssubl2<mode>"
4572 [(match_operand:<VWIDE> 0 "register_operand")
4573 (match_operand:VQW 1 "register_operand")
4574 (match_operand:VQW 2 "register_operand")]
4577 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4578 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4583 (define_expand "aarch64_usubl2<mode>"
4584 [(match_operand:<VWIDE> 0 "register_operand")
4585 (match_operand:VQW 1 "register_operand")
4586 (match_operand:VQW 2 "register_operand")]
4589 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4590 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4595 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4596 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4597 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4598 (match_operand:VD_BHSI 1 "register_operand" "w"))
4600 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4602 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4603 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4606 ;; <su><addsub>w<q>.
4608 (define_expand "widen_ssum<mode>3"
4609 [(set (match_operand:<VDBLW> 0 "register_operand")
4610 (plus:<VDBLW> (sign_extend:<VDBLW>
4611 (match_operand:VQW 1 "register_operand"))
4612 (match_operand:<VDBLW> 2 "register_operand")))]
4615 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4616 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4618 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4620 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4625 (define_expand "widen_ssum<mode>3"
4626 [(set (match_operand:<VWIDE> 0 "register_operand")
4627 (plus:<VWIDE> (sign_extend:<VWIDE>
4628 (match_operand:VD_BHSI 1 "register_operand"))
4629 (match_operand:<VWIDE> 2 "register_operand")))]
4632 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4636 (define_expand "widen_usum<mode>3"
4637 [(set (match_operand:<VDBLW> 0 "register_operand")
4638 (plus:<VDBLW> (zero_extend:<VDBLW>
4639 (match_operand:VQW 1 "register_operand"))
4640 (match_operand:<VDBLW> 2 "register_operand")))]
4643 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4644 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4646 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4648 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4653 (define_expand "widen_usum<mode>3"
4654 [(set (match_operand:<VWIDE> 0 "register_operand")
4655 (plus:<VWIDE> (zero_extend:<VWIDE>
4656 (match_operand:VD_BHSI 1 "register_operand"))
4657 (match_operand:<VWIDE> 2 "register_operand")))]
4660 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4664 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4665 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4666 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4668 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4670 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4671 [(set_attr "type" "neon_sub_widen")]
4674 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4675 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4676 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4679 (match_operand:VQW 2 "register_operand" "w")
4680 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4682 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4683 [(set_attr "type" "neon_sub_widen")]
4686 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4687 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4688 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4691 (match_operand:VQW 2 "register_operand" "w")
4692 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4694 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4695 [(set_attr "type" "neon_sub_widen")]
4698 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4699 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4701 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4702 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4704 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4705 [(set_attr "type" "neon_add_widen")]
4708 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4709 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4713 (match_operand:VQW 2 "register_operand" "w")
4714 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4715 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4717 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4718 [(set_attr "type" "neon_add_widen")]
4721 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4722 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4726 (match_operand:VQW 2 "register_operand" "w")
4727 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4728 (match_operand:<VWIDE> 1 "register_operand" "w")))]
4730 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4731 [(set_attr "type" "neon_add_widen")]
4734 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4735 [(set (match_operand:<VWIDE> 0 "register_operand")
4739 (match_operand:VQW 2 "register_operand")
4741 (match_operand:<VWIDE> 1 "register_operand")))]
4744 /* We still do an emit_insn rather than relying on the pattern above
4745 because for the MINUS case the operands would need to be swapped
4748 = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4749 emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4757 ;; <su><r>h<addsub>.
4759 (define_expand "<su_optab>avg<mode>3_floor"
4760 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4764 (ANY_EXTEND:<V2XWIDE>
4765 (match_operand:VDQ_BHSI 1 "register_operand"))
4766 (ANY_EXTEND:<V2XWIDE>
4767 (match_operand:VDQ_BHSI 2 "register_operand")))
4771 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4775 (define_expand "<su_optab>avg<mode>3_ceil"
4776 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4781 (ANY_EXTEND:<V2XWIDE>
4782 (match_operand:VDQ_BHSI 1 "register_operand"))
4783 (ANY_EXTEND:<V2XWIDE>
4784 (match_operand:VDQ_BHSI 2 "register_operand")))
4789 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4793 (define_expand "aarch64_<su>hsub<mode>"
4794 [(set (match_operand:VDQ_BHSI 0 "register_operand")
4798 (ANY_EXTEND:<V2XWIDE>
4799 (match_operand:VDQ_BHSI 1 "register_operand"))
4800 (ANY_EXTEND:<V2XWIDE>
4801 (match_operand:VDQ_BHSI 2 "register_operand")))
4805 operands[3] = CONST1_RTX (<V2XWIDE>mode);
4809 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4810 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4814 (ANY_EXTEND:<V2XWIDE>
4815 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4816 (ANY_EXTEND:<V2XWIDE>
4817 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4818 (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4820 "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4821 [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4824 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4825 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4830 (ANY_EXTEND:<V2XWIDE>
4831 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4832 (ANY_EXTEND:<V2XWIDE>
4833 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4834 (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4837 "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4838 [(set_attr "type" "neon_add_halve<q>")]
4841 ;; <r><addsub>hn<q>.
4843 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4844 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4845 (truncate:<VNARROWQ>
4847 (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4848 (match_operand:VQN 2 "register_operand" "w"))
4849 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4851 "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4852 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4855 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4856 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4857 (truncate:<VNARROWQ>
4860 (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4861 (match_operand:VQN 2 "register_operand" "w"))
4862 (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4863 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4865 "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4866 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4869 (define_expand "aarch64_<optab>hn<mode>"
4870 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4871 (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4872 (match_operand:VQN 2 "register_operand")))]
4876 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4877 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4878 emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4879 operands[2], shft));
4884 (define_expand "aarch64_r<optab>hn<mode>"
4885 [(set (match_operand:<VNARROWQ> 0 "register_operand")
4886 (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4887 (match_operand:VQN 2 "register_operand")))]
4891 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4892 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4894 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4895 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4896 emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4897 operands[2], rnd, shft));
4902 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4903 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4904 (vec_concat:<VNARROWQ2>
4905 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4906 (truncate:<VNARROWQ>
4908 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4909 (match_operand:VQN 3 "register_operand" "w"))
4910 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4911 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4912 "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4913 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4916 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4917 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4918 (vec_concat:<VNARROWQ2>
4919 (match_operand:<VNARROWQ> 1 "register_operand" "0")
4920 (truncate:<VNARROWQ>
4923 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4924 (match_operand:VQN 3 "register_operand" "w"))
4925 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4926 (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4927 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4928 "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4929 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4932 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4933 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4934 (vec_concat:<VNARROWQ2>
4935 (truncate:<VNARROWQ>
4937 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4938 (match_operand:VQN 3 "register_operand" "w"))
4939 (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
4940 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4941 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4942 "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4943 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4946 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
4947 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4948 (vec_concat:<VNARROWQ2>
4949 (truncate:<VNARROWQ>
4952 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4953 (match_operand:VQN 3 "register_operand" "w"))
4954 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4955 (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
4956 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4957 "TARGET_SIMD && BYTES_BIG_ENDIAN"
4958 "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4959 [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4962 (define_expand "aarch64_<optab>hn2<mode>"
4963 [(match_operand:<VNARROWQ2> 0 "register_operand")
4964 (match_operand:<VNARROWQ> 1 "register_operand")
4965 (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
4966 (match_operand:VQN 3 "register_operand"))]
4970 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4971 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4972 if (BYTES_BIG_ENDIAN)
4973 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
4974 operands[1], operands[2], operands[3], shft));
4976 emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
4977 operands[1], operands[2], operands[3], shft));
4982 (define_expand "aarch64_r<optab>hn2<mode>"
4983 [(match_operand:<VNARROWQ2> 0 "register_operand")
4984 (match_operand:<VNARROWQ> 1 "register_operand")
4985 (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
4986 (match_operand:VQN 3 "register_operand"))]
4990 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4991 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4993 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4994 HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4995 if (BYTES_BIG_ENDIAN)
4996 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
4997 operands[1], operands[2], operands[3], rnd, shft));
4999 emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5000 operands[1], operands[2], operands[3], rnd, shft));
5005 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5006 (define_insn_and_split "*bitmask_shift_plus<mode>"
5007 [(set (match_operand:VQN 0 "register_operand" "=&w")
5010 (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5011 (match_operand:VQN 2 "register_operand" "w"))
5012 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5013 (match_operand:VQN 4 "register_operand" "w")))]
5020 if (can_create_pseudo_p ())
5021 tmp = gen_reg_rtx (<VNARROWQ>mode);
5023 tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5024 emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5025 emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5031 (define_insn "aarch64_pmul<mode>"
5032 [(set (match_operand:VB 0 "register_operand" "=w")
5033 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5034 (match_operand:VB 2 "register_operand" "w")]
5037 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5038 [(set_attr "type" "neon_mul_<Vetype><q>")]
5041 (define_insn "aarch64_pmullv8qi"
5042 [(set (match_operand:V8HI 0 "register_operand" "=w")
5043 (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5044 (match_operand:V8QI 2 "register_operand" "w")]
5047 "pmull\\t%0.8h, %1.8b, %2.8b"
5048 [(set_attr "type" "neon_mul_b_long")]
5051 (define_insn "aarch64_pmull_hiv16qi_insn"
5052 [(set (match_operand:V8HI 0 "register_operand" "=w")
5055 (match_operand:V16QI 1 "register_operand" "w")
5056 (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5058 (match_operand:V16QI 2 "register_operand" "w")
5062 "pmull2\\t%0.8h, %1.16b, %2.16b"
5063 [(set_attr "type" "neon_mul_b_long")]
5066 (define_expand "aarch64_pmull_hiv16qi"
5067 [(match_operand:V8HI 0 "register_operand")
5068 (match_operand:V16QI 1 "register_operand")
5069 (match_operand:V16QI 2 "register_operand")]
5072 rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5073 emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5081 (define_insn "aarch64_fmulx<mode>"
5082 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5084 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5085 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5088 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5089 [(set_attr "type" "neon_fp_mul_<stype>")]
5092 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5094 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5095 [(set (match_operand:VDQSF 0 "register_operand" "=w")
5097 [(match_operand:VDQSF 1 "register_operand" "w")
5098 (vec_duplicate:VDQSF
5100 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5101 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5105 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5106 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5108 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5111 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5113 (define_insn "*aarch64_mulx_elt<mode>"
5114 [(set (match_operand:VDQF 0 "register_operand" "=w")
5116 [(match_operand:VDQF 1 "register_operand" "w")
5119 (match_operand:VDQF 2 "register_operand" "w")
5120 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5124 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5125 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5127 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5132 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5133 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5135 [(match_operand:VHSDF 1 "register_operand" "w")
5136 (vec_duplicate:VHSDF
5137 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5140 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5141 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5144 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5145 ;; vmulxd_lane_f64 == vmulx_lane_f64
5146 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5148 (define_insn "*aarch64_vgetfmulx<mode>"
5149 [(set (match_operand:<VEL> 0 "register_operand" "=w")
5151 [(match_operand:<VEL> 1 "register_operand" "w")
5153 (match_operand:VDQF 2 "register_operand" "w")
5154 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5158 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5159 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5161 [(set_attr "type" "fmul<Vetype>")]
5165 (define_insn "<su_optab>s<addsub><mode>3<vczle><vczbe>"
5166 [(set (match_operand:VSDQ_I_QI_HI 0 "register_operand" "=w")
5167 (BINQOPS:VSDQ_I_QI_HI
5168 (match_operand:VSDQ_I_QI_HI 1 "register_operand" "w")
5169 (match_operand:VSDQ_I_QI_HI 2 "register_operand" "w")))]
5171 "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5172 [(set_attr "type" "neon_q<addsub><q>")]
5175 (define_expand "<su_optab>s<addsub><mode>3"
5177 [(set (match_operand:GPI 0 "register_operand")
5178 (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5179 (match_operand:GPI 2 "aarch64_plus_operand")))
5180 (clobber (scratch:GPI))
5181 (clobber (reg:CC CC_REGNUM))])]
5184 ;; Introducing a temporary GP reg allows signed saturating arithmetic with GPR
5185 ;; operands to be calculated without the use of costly transfers to and from FP
5186 ;; registers. For example, saturating addition usually uses three FMOVs:
5193 ;; Using a temporary register results in three cheaper instructions being used
5194 ;; in place of the three FMOVs, which calculate the saturating limit accounting
5195 ;; for the signedness of operand2:
5199 ;; eor x2, x2, 0x8000000000000000
5200 ;; csinv x0, x0, x2, vc
5202 ;; If operand2 is a constant value, the temporary register can be used to store
5203 ;; the saturating limit without the need for asr, xor to calculate said limit.
5205 (define_insn_and_split "aarch64_<su_optab>s<addsub><mode>3<vczle><vczbe>"
5206 [(set (match_operand:GPI 0 "register_operand")
5207 (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5208 (match_operand:GPI 2 "aarch64_plus_operand")))
5209 (clobber (match_scratch:GPI 3))
5210 (clobber (reg:CC CC_REGNUM))]
5212 {@ [ cons: =0, 1 , 2 , =3 ; attrs: type , arch , length ]
5213 [ w , w , w , X ; neon_q<addsub><q> , simd , 4 ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
5214 [ r , r , JIr , &r ; * , * , 8 ] #
5216 "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
5223 if (REG_P (operands[2]))
5225 rtx shift_constant = gen_int_mode (GET_MODE_BITSIZE (<MODE>mode) - 1,
5227 auto limit = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1);
5228 rtx limit_constant = gen_int_mode (limit, <MODE>mode);
5229 emit_insn (gen_ashr<mode>3 (operands[3], operands[2], shift_constant));
5230 emit_insn (gen_xor<mode>3 (operands[3], operands[3], limit_constant));
5235 emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
5239 emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
5246 rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
5250 operands[4] = gen_rtx_NE (<MODE>mode, ccin, const0_rtx);
5251 operands[5] = gen_rtx_NOT (<MODE>mode, operands[3]);
5252 operands[6] = operands[0];
5255 operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
5256 operands[5] = operands[0];
5257 operands[6] = operands[3];
5265 auto imm = INTVAL (operands[2]);
5266 rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
5272 emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5273 operands[2], neg_imm));
5274 limit = imm >= 0 ? wi::min_value (<MODE>mode, SIGNED)
5275 : wi::max_value (<MODE>mode, SIGNED);
5278 emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5279 neg_imm, operands[2]));
5280 limit = imm >= 0 ? wi::max_value (<MODE>mode, SIGNED)
5281 : wi::min_value (<MODE>mode, SIGNED);
5287 rtx sat_limit = immed_wide_int_const (limit, <MODE>mode);
5288 emit_insn (gen_rtx_SET (operands[3], sat_limit));
5290 rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
5291 operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
5292 operands[5] = operands[0];
5293 operands[6] = operands[3];
5298 ;; Unsigned saturating arithmetic with GPR operands can be optimised similarly
5299 ;; to the signed case, albeit without the need for a temporary register as the
5300 ;; saturating limit can be inferred from the <addsub> code. This applies only
5301 ;; to SImode and DImode.
5303 (define_insn_and_split "<su_optab>s<addsub><mode>3<vczle><vczbe>"
5304 [(set (match_operand:GPI 0 "register_operand")
5305 (UBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5306 (match_operand:GPI 2 "aarch64_plus_operand")))
5307 (clobber (reg:CC CC_REGNUM))]
5309 {@ [ cons: =0, 1 , 2 ; attrs: type , arch , length ]
5310 [ w , w , w ; neon_q<addsub><q> , simd , 4 ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
5311 [ r , r , JIr ; * , * , 8 ] #
5313 "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
5321 if (REG_P (operands[2]))
5326 emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
5330 emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
5339 auto imm = UINTVAL (operands[2]);
5340 rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
5344 emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5345 operands[2], neg_imm));
5348 emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5349 neg_imm, operands[2]));
5356 rtx ccin = gen_rtx_REG (CCmode, CC_REGNUM);
5360 operands[3] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
5361 operands[4] = gen_int_mode (-1, <MODE>mode);
5364 operands[3] = gen_rtx_GEU (<MODE>mode, ccin, const0_rtx);
5365 operands[4] = const0_rtx;
5373 ;; suqadd and usqadd
5375 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5376 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5377 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5378 (match_operand:VSDQ_I 2 "register_operand" "w")]
5381 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5382 [(set_attr "type" "neon_qadd<q>")]
5385 ;; sqmovn and uqmovn
5387 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5388 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5389 (SAT_TRUNC:<VNARROWQ>
5390 (match_operand:SD_HSDI 1 "register_operand" "w")))]
5392 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5393 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5396 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5397 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5398 (SAT_TRUNC:<VNARROWQ>
5399 (match_operand:VQN 1 "register_operand" "w")))]
5401 "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5402 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5405 (define_insn "aarch64_<su>qxtn2<mode>_le"
5406 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5407 (vec_concat:<VNARROWQ2>
5408 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5409 (SAT_TRUNC:<VNARROWQ>
5410 (match_operand:VQN 2 "register_operand" "w"))))]
5411 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5412 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5413 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5416 (define_insn "aarch64_<su>qxtn2<mode>_be"
5417 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5418 (vec_concat:<VNARROWQ2>
5419 (SAT_TRUNC:<VNARROWQ>
5420 (match_operand:VQN 2 "register_operand" "w"))
5421 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5422 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5423 "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5424 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5427 (define_expand "aarch64_<su>qxtn2<mode>"
5428 [(match_operand:<VNARROWQ2> 0 "register_operand")
5429 (match_operand:<VNARROWQ> 1 "register_operand")
5430 (SAT_TRUNC:<VNARROWQ>
5431 (match_operand:VQN 2 "register_operand"))]
5434 if (BYTES_BIG_ENDIAN)
5435 emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5438 emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5446 (define_insn "aarch64_sqmovun<mode>"
5447 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5448 (truncate:<VNARROWQ>
5451 (match_operand:SD_HSDI 1 "register_operand" "w")
5453 (const_int <half_mask>))))]
5455 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5456 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5459 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5460 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5461 (truncate:<VNARROWQ>
5463 (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5464 (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5465 (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5467 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5468 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5471 (define_expand "aarch64_sqmovun<mode>"
5472 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5473 (truncate:<VNARROWQ>
5475 (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5480 operands[2] = CONST0_RTX (<MODE>mode);
5482 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5483 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5487 (define_insn "aarch64_sqxtun2<mode>_le"
5488 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5489 (vec_concat:<VNARROWQ2>
5490 (match_operand:<VNARROWQ> 1 "register_operand" "0")
5491 (truncate:<VNARROWQ>
5494 (match_operand:VQN 2 "register_operand" "w")
5495 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5496 (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5497 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5498 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5499 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5502 (define_insn "aarch64_sqxtun2<mode>_be"
5503 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5504 (vec_concat:<VNARROWQ2>
5505 (truncate:<VNARROWQ>
5508 (match_operand:VQN 2 "register_operand" "w")
5509 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5510 (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5511 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5512 "TARGET_SIMD && BYTES_BIG_ENDIAN"
5513 "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5514 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5517 (define_expand "aarch64_sqxtun2<mode>"
5518 [(match_operand:<VNARROWQ2> 0 "register_operand")
5519 (match_operand:<VNARROWQ> 1 "register_operand")
5520 (match_operand:VQN 2 "register_operand")]
5523 rtx zeros = CONST0_RTX (<MODE>mode);
5524 rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5525 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5526 if (BYTES_BIG_ENDIAN)
5527 emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5528 operands[2], zeros, half_umax));
5530 emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5531 operands[2], zeros, half_umax));
5538 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5539 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5541 (match_operand:VSDQ_I 1 "register_operand" "w")))]
5543 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5544 [(set_attr "type" "neon_<optab><q>")]
5549 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5550 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5552 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5553 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5556 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5557 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5560 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5561 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5563 [(match_operand:VDQHS 1 "register_operand" "w")
5564 (vec_duplicate:VDQHS
5565 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5568 "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5569 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5574 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5575 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5577 [(match_operand:VDQHS 1 "register_operand" "w")
5579 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5580 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5584 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5585 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5586 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5589 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5590 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5592 [(match_operand:VDQHS 1 "register_operand" "w")
5594 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5595 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5599 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5600 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5601 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5604 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5605 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5607 [(match_operand:SD_HSI 1 "register_operand" "w")
5609 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5610 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5614 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5615 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5616 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5619 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5620 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5622 [(match_operand:SD_HSI 1 "register_operand" "w")
5624 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5625 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5629 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5630 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5631 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5636 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5637 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5639 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5640 (match_operand:VSDQ_HSI 2 "register_operand" "w")
5641 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5644 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5645 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5648 ;; sqrdml[as]h_lane.
5650 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5651 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5653 [(match_operand:VDQHS 1 "register_operand" "0")
5654 (match_operand:VDQHS 2 "register_operand" "w")
5656 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5657 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5661 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5663 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5665 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5668 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5669 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5671 [(match_operand:SD_HSI 1 "register_operand" "0")
5672 (match_operand:SD_HSI 2 "register_operand" "w")
5674 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5675 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5679 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5681 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5683 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5686 ;; sqrdml[as]h_laneq.
5688 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5689 [(set (match_operand:VDQHS 0 "register_operand" "=w")
5691 [(match_operand:VDQHS 1 "register_operand" "0")
5692 (match_operand:VDQHS 2 "register_operand" "w")
5694 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5695 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5699 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5701 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5703 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5706 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5707 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5709 [(match_operand:SD_HSI 1 "register_operand" "0")
5710 (match_operand:SD_HSI 2 "register_operand" "w")
5712 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5713 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5717 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5719 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5721 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5726 (define_insn "aarch64_sqdmlal<mode>"
5727 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5731 (sign_extend:<VWIDE>
5732 (match_operand:VSD_HSI 2 "register_operand" "w"))
5733 (sign_extend:<VWIDE>
5734 (match_operand:VSD_HSI 3 "register_operand" "w")))
5736 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5738 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5739 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5742 (define_insn "aarch64_sqdmlsl<mode>"
5743 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5745 (match_operand:<VWIDE> 1 "register_operand" "0")
5748 (sign_extend:<VWIDE>
5749 (match_operand:VSD_HSI 2 "register_operand" "w"))
5750 (sign_extend:<VWIDE>
5751 (match_operand:VSD_HSI 3 "register_operand" "w")))
5754 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5755 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5760 (define_insn "aarch64_sqdmlal_lane<mode>"
5761 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5765 (sign_extend:<VWIDE>
5766 (match_operand:VD_HSI 2 "register_operand" "w"))
5767 (vec_duplicate:<VWIDE>
5768 (sign_extend:<VWIDE_S>
5770 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5771 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5774 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5777 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5779 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5781 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5784 (define_insn "aarch64_sqdmlsl_lane<mode>"
5785 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5787 (match_operand:<VWIDE> 1 "register_operand" "0")
5790 (sign_extend:<VWIDE>
5791 (match_operand:VD_HSI 2 "register_operand" "w"))
5792 (vec_duplicate:<VWIDE>
5793 (sign_extend:<VWIDE_S>
5795 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5796 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5801 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5803 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5805 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5809 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5810 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5812 (match_operand:<VWIDE> 1 "register_operand" "0")
5815 (sign_extend:<VWIDE>
5816 (match_operand:VD_HSI 2 "register_operand" "w"))
5817 (vec_duplicate:<VWIDE>
5818 (sign_extend:<VWIDE_S>
5820 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5821 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5826 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5828 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5830 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5833 (define_insn "aarch64_sqdmlal_laneq<mode>"
5834 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5838 (sign_extend:<VWIDE>
5839 (match_operand:VD_HSI 2 "register_operand" "w"))
5840 (vec_duplicate:<VWIDE>
5841 (sign_extend:<VWIDE_S>
5843 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5844 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5847 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5850 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5852 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5854 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5858 (define_insn "aarch64_sqdmlal_lane<mode>"
5859 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5863 (sign_extend:<VWIDE>
5864 (match_operand:SD_HSI 2 "register_operand" "w"))
5865 (sign_extend:<VWIDE>
5867 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5868 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5871 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5874 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5876 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5878 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5881 (define_insn "aarch64_sqdmlsl_lane<mode>"
5882 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5884 (match_operand:<VWIDE> 1 "register_operand" "0")
5887 (sign_extend:<VWIDE>
5888 (match_operand:SD_HSI 2 "register_operand" "w"))
5889 (sign_extend:<VWIDE>
5891 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5892 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5897 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5899 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5901 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5905 (define_insn "aarch64_sqdmlal_laneq<mode>"
5906 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5910 (sign_extend:<VWIDE>
5911 (match_operand:SD_HSI 2 "register_operand" "w"))
5912 (sign_extend:<VWIDE>
5914 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5915 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5918 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5921 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5923 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5925 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5928 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5929 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5931 (match_operand:<VWIDE> 1 "register_operand" "0")
5934 (sign_extend:<VWIDE>
5935 (match_operand:SD_HSI 2 "register_operand" "w"))
5936 (sign_extend:<VWIDE>
5938 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5939 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5944 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5946 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5948 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5953 (define_insn "aarch64_sqdmlsl_n<mode>"
5954 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5956 (match_operand:<VWIDE> 1 "register_operand" "0")
5959 (sign_extend:<VWIDE>
5960 (match_operand:VD_HSI 2 "register_operand" "w"))
5961 (vec_duplicate:<VWIDE>
5962 (sign_extend:<VWIDE_S>
5963 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5966 "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5967 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5970 (define_insn "aarch64_sqdmlal_n<mode>"
5971 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5975 (sign_extend:<VWIDE>
5976 (match_operand:VD_HSI 2 "register_operand" "w"))
5977 (vec_duplicate:<VWIDE>
5978 (sign_extend:<VWIDE_S>
5979 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5981 (match_operand:<VWIDE> 1 "register_operand" "0")))]
5983 "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5984 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5990 (define_insn "aarch64_sqdmlal2<mode>_internal"
5991 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5995 (sign_extend:<VWIDE>
5997 (match_operand:VQ_HSI 2 "register_operand" "w")
5998 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5999 (sign_extend:<VWIDE>
6001 (match_operand:VQ_HSI 3 "register_operand" "w")
6004 (match_operand:<VWIDE> 1 "register_operand" "0")))]
6006 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
6007 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6010 (define_insn "aarch64_sqdmlsl2<mode>_internal"
6011 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6013 (match_operand:<VWIDE> 1 "register_operand" "0")
6016 (sign_extend:<VWIDE>
6018 (match_operand:VQ_HSI 2 "register_operand" "w")
6019 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6020 (sign_extend:<VWIDE>
6022 (match_operand:VQ_HSI 3 "register_operand" "w")
6026 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
6027 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6030 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
6031 [(match_operand:<VWIDE> 0 "register_operand")
6033 (match_operand:<VWIDE> 1 "register_operand")
6035 (match_operand:VQ_HSI 2 "register_operand")
6036 (match_operand:VQ_HSI 3 "register_operand")]
6039 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6040 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
6041 operands[1], operands[2],
6048 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
6049 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6051 (match_operand:<VWIDE> 1 "register_operand" "0")
6054 (sign_extend:<VWIDE>
6056 (match_operand:VQ_HSI 2 "register_operand" "w")
6057 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6058 (vec_duplicate:<VWIDE>
6059 (sign_extend:<VWIDE_S>
6061 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
6062 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6067 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
6069 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6071 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6074 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
6075 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6079 (sign_extend:<VWIDE>
6081 (match_operand:VQ_HSI 2 "register_operand" "w")
6082 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6083 (vec_duplicate:<VWIDE>
6084 (sign_extend:<VWIDE_S>
6086 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
6087 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6090 (match_operand:<VWIDE> 1 "register_operand" "0")))]
6093 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
6095 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6097 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6100 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
6101 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6103 (match_operand:<VWIDE> 1 "register_operand" "0")
6106 (sign_extend:<VWIDE>
6108 (match_operand:VQ_HSI 2 "register_operand" "w")
6109 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6110 (vec_duplicate:<VWIDE>
6111 (sign_extend:<VWIDE_S>
6113 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6114 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6119 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6121 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6123 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6126 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
6127 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6131 (sign_extend:<VWIDE>
6133 (match_operand:VQ_HSI 2 "register_operand" "w")
6134 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6135 (vec_duplicate:<VWIDE>
6136 (sign_extend:<VWIDE_S>
6138 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6139 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6142 (match_operand:<VWIDE> 1 "register_operand" "0")))]
6145 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6147 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6149 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6152 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
6153 [(match_operand:<VWIDE> 0 "register_operand")
6155 (match_operand:<VWIDE> 1 "register_operand")
6157 (match_operand:VQ_HSI 2 "register_operand")
6158 (match_operand:<VCOND> 3 "register_operand")
6159 (match_operand:SI 4 "immediate_operand")]
6162 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6163 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6164 operands[1], operands[2],
6165 operands[3], operands[4], p));
6169 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6170 [(match_operand:<VWIDE> 0 "register_operand")
6172 (match_operand:<VWIDE> 1 "register_operand")
6174 (match_operand:VQ_HSI 2 "register_operand")
6175 (match_operand:<VCONQ> 3 "register_operand")
6176 (match_operand:SI 4 "immediate_operand")]
6179 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6180 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6181 operands[1], operands[2],
6182 operands[3], operands[4], p));
6186 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6187 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6189 (match_operand:<VWIDE> 1 "register_operand" "0")
6192 (sign_extend:<VWIDE>
6194 (match_operand:VQ_HSI 2 "register_operand" "w")
6195 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6196 (vec_duplicate:<VWIDE>
6197 (sign_extend:<VWIDE_S>
6198 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6201 "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6202 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6205 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6206 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6210 (sign_extend:<VWIDE>
6212 (match_operand:VQ_HSI 2 "register_operand" "w")
6213 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6214 (vec_duplicate:<VWIDE>
6215 (sign_extend:<VWIDE_S>
6216 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6218 (match_operand:<VWIDE> 1 "register_operand" "0")))]
6220 "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6221 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6224 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6225 [(match_operand:<VWIDE> 0 "register_operand")
6227 (match_operand:<VWIDE> 1 "register_operand")
6229 (match_operand:VQ_HSI 2 "register_operand")
6230 (match_operand:<VEL> 3 "register_operand")]
6233 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6234 emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6235 operands[1], operands[2],
6242 (define_insn "aarch64_sqdmull<mode>"
6243 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6246 (sign_extend:<VWIDE>
6247 (match_operand:VSD_HSI 1 "register_operand" "w"))
6248 (sign_extend:<VWIDE>
6249 (match_operand:VSD_HSI 2 "register_operand" "w")))
6252 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6253 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6258 (define_insn "aarch64_sqdmull_lane<mode>"
6259 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6262 (sign_extend:<VWIDE>
6263 (match_operand:VD_HSI 1 "register_operand" "w"))
6264 (vec_duplicate:<VWIDE>
6265 (sign_extend:<VWIDE_S>
6267 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6268 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6273 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6274 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6276 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6279 (define_insn "aarch64_sqdmull_laneq<mode>"
6280 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6283 (sign_extend:<VWIDE>
6284 (match_operand:VD_HSI 1 "register_operand" "w"))
6285 (vec_duplicate:<VWIDE>
6286 (sign_extend:<VWIDE_S>
6288 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6289 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6294 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6295 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6297 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6300 (define_insn "aarch64_sqdmull_lane<mode>"
6301 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6304 (sign_extend:<VWIDE>
6305 (match_operand:SD_HSI 1 "register_operand" "w"))
6306 (sign_extend:<VWIDE>
6308 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6309 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6314 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6315 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6317 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6320 (define_insn "aarch64_sqdmull_laneq<mode>"
6321 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6324 (sign_extend:<VWIDE>
6325 (match_operand:SD_HSI 1 "register_operand" "w"))
6326 (sign_extend:<VWIDE>
6328 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6329 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6334 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6335 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6337 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6342 (define_insn "aarch64_sqdmull_n<mode>"
6343 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6346 (sign_extend:<VWIDE>
6347 (match_operand:VD_HSI 1 "register_operand" "w"))
6348 (vec_duplicate:<VWIDE>
6349 (sign_extend:<VWIDE_S>
6350 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6354 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6355 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6360 (define_insn "aarch64_sqdmull2<mode>_internal"
6361 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6364 (sign_extend:<VWIDE>
6366 (match_operand:VQ_HSI 1 "register_operand" "w")
6367 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6368 (sign_extend:<VWIDE>
6370 (match_operand:VQ_HSI 2 "register_operand" "w")
6375 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6376 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6379 (define_expand "aarch64_sqdmull2<mode>"
6380 [(match_operand:<VWIDE> 0 "register_operand")
6381 (match_operand:VQ_HSI 1 "register_operand")
6382 (match_operand:VQ_HSI 2 "register_operand")]
6385 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6386 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6393 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6394 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6397 (sign_extend:<VWIDE>
6399 (match_operand:VQ_HSI 1 "register_operand" "w")
6400 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6401 (vec_duplicate:<VWIDE>
6402 (sign_extend:<VWIDE_S>
6404 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6405 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6410 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6411 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6413 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6416 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6417 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6420 (sign_extend:<VWIDE>
6422 (match_operand:VQ_HSI 1 "register_operand" "w")
6423 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6424 (vec_duplicate:<VWIDE>
6425 (sign_extend:<VWIDE_S>
6427 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6428 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6433 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6434 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6436 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6439 (define_expand "aarch64_sqdmull2_lane<mode>"
6440 [(match_operand:<VWIDE> 0 "register_operand")
6441 (match_operand:VQ_HSI 1 "register_operand")
6442 (match_operand:<VCOND> 2 "register_operand")
6443 (match_operand:SI 3 "immediate_operand")]
6446 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6447 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6448 operands[2], operands[3],
6453 (define_expand "aarch64_sqdmull2_laneq<mode>"
6454 [(match_operand:<VWIDE> 0 "register_operand")
6455 (match_operand:VQ_HSI 1 "register_operand")
6456 (match_operand:<VCONQ> 2 "register_operand")
6457 (match_operand:SI 3 "immediate_operand")]
6460 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6461 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6462 operands[2], operands[3],
6469 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6470 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6473 (sign_extend:<VWIDE>
6475 (match_operand:VQ_HSI 1 "register_operand" "w")
6476 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6477 (vec_duplicate:<VWIDE>
6478 (sign_extend:<VWIDE_S>
6479 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6483 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6484 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6487 (define_expand "aarch64_sqdmull2_n<mode>"
6488 [(match_operand:<VWIDE> 0 "register_operand")
6489 (match_operand:VQ_HSI 1 "register_operand")
6490 (match_operand:<VEL> 2 "register_operand")]
6493 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6494 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6501 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6502 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6504 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6505 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6508 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6509 [(set_attr "type" "neon_shift_reg<q>")]
6515 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6516 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6518 [(match_operand:VSDQ_I 1 "register_operand" "w")
6519 (match_operand:VSDQ_I 2 "register_operand" "w")]
6522 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6523 [(set_attr "type" "neon_sat_shift_reg<q>")]
6528 (define_insn "aarch64_<su>shll<mode>"
6529 [(set (match_operand:<VWIDE> 0 "register_operand")
6530 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6531 (match_operand:VD_BHSI 1 "register_operand"))
6532 (match_operand:<VWIDE> 2
6533 "aarch64_simd_shll_imm_vec")))]
6536 [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6537 [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6539 [(set_attr "type" "neon_shift_imm_long")]
6542 (define_expand "aarch64_<sur>shll_n<mode>"
6543 [(set (match_operand:<VWIDE> 0 "register_operand")
6544 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6546 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6550 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6551 emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6558 (define_insn "aarch64_<su>shll2<mode>"
6559 [(set (match_operand:<VWIDE> 0 "register_operand")
6560 (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6562 (match_operand:VQW 1 "register_operand")
6563 (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6564 (match_operand:<VWIDE> 3
6565 "aarch64_simd_shll_imm_vec")))]
6567 {@ [cons: =0, 1, 2, 3]
6568 [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6569 [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6571 [(set_attr "type" "neon_shift_imm_long")]
6574 (define_expand "aarch64_<sur>shll2_n<mode>"
6575 [(set (match_operand:<VWIDE> 0 "register_operand")
6576 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6578 "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6582 rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6583 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6584 emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6591 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6592 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6596 (<SHIFTEXTEND>:<V2XWIDE>
6597 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6598 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6599 (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6601 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6602 "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6603 [(set_attr "type" "neon_sat_shift_imm<q>")]
6606 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6607 [(match_operand:VSDQ_I_DI 0 "register_operand")
6609 (match_operand:VSDQ_I_DI 1 "register_operand")
6610 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6613 /* Use this expander to create the rounding constant vector, which is
6614 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6615 RTL is generated when handling the DImode expanders. */
6616 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6617 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6618 rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6619 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6620 if (VECTOR_MODE_P (<MODE>mode))
6622 shft = gen_const_vec_duplicate (<MODE>mode, shft);
6623 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6626 emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6634 (define_insn "aarch64_<sur>sra_ndi"
6635 [(set (match_operand:DI 0 "register_operand" "=w")
6636 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6637 (match_operand:DI 2 "register_operand" "w")
6639 "aarch64_simd_shift_imm_offset_di" "i")]
6642 "<sur>sra\\t%d0, %d2, %3"
6643 [(set_attr "type" "neon_shift_acc")]
6648 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6649 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6650 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6651 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6653 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6656 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6657 [(set_attr "type" "neon_shift_imm<q>")]
6662 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6663 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6664 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6666 "aarch64_simd_shift_imm_<ve_mode>" "i")]
6669 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6670 [(set_attr "type" "neon_sat_shift_imm<q>")]
6676 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6677 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6678 (SAT_TRUNC:<VNARROWQ>
6679 (<TRUNC_SHIFT>:SD_HSDI
6680 (match_operand:SD_HSDI 1 "register_operand" "w")
6681 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6683 "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6684 [(set_attr "type" "neon_shift_imm_narrow_q")]
6687 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6688 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6689 (ALL_TRUNC:<VNARROWQ>
6691 (match_operand:VQN 1 "register_operand" "w")
6692 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6693 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6694 "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6695 [(set_attr "type" "neon_shift_imm_narrow_q")]
6698 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6699 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6700 (ALL_TRUNC:<VNARROWQ>
6702 (match_operand:VQN 1 "register_operand")
6703 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6706 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6707 INTVAL (operands[2]));
6711 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6712 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6713 (ALL_TRUNC:<VNARROWQ>
6714 (<TRUNC_SHIFT>:<V2XWIDE>
6716 (<TRUNCEXTEND>:<V2XWIDE>
6717 (match_operand:VQN 1 "register_operand" "w"))
6718 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6719 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6721 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6722 "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6723 [(set_attr "type" "neon_shift_imm_narrow_q")]
6726 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6727 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6728 (SAT_TRUNC:<VNARROWQ>
6729 (<TRUNC_SHIFT>:<DWI>
6731 (<TRUNCEXTEND>:<DWI>
6732 (match_operand:SD_HSDI 1 "register_operand" "w"))
6733 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6734 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6736 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6737 "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6738 [(set_attr "type" "neon_shift_imm_narrow_q")]
6741 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6742 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6743 (SAT_TRUNC:<VNARROWQ>
6744 (<TRUNC_SHIFT>:<V2XWIDE>
6746 (<TRUNCEXTEND>:<V2XWIDE>
6747 (match_operand:SD_HSDI 1 "register_operand"))
6749 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6752 /* Use this expander to create the rounding constant vector, which is
6753 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6754 RTL is generated when handling the DImode expanders. */
6755 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6756 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6757 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6761 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6762 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6763 (ALL_TRUNC:<VNARROWQ>
6764 (<TRUNC_SHIFT>:<V2XWIDE>
6766 (<TRUNCEXTEND>:<V2XWIDE>
6767 (match_operand:VQN 1 "register_operand"))
6769 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6772 if (<CODE> == TRUNCATE
6773 && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6775 rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6776 emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6779 /* Use this expander to create the rounding constant vector, which is
6780 1 << (shift - 1). Use wide_int here to ensure that the right TImode
6781 RTL is generated when handling the DImode expanders. */
6782 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6783 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6784 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6785 operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6786 operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6790 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6791 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6792 (truncate:<VNARROWQ>
6796 (match_operand:VQN 1 "register_operand" "w")
6797 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6798 (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6799 (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6801 "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6802 [(set_attr "type" "neon_shift_imm_narrow_q")]
6805 (define_insn "aarch64_sqshrun_n<mode>_insn"
6806 [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6810 (match_operand:SD_HSDI 1 "register_operand" "w")
6811 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6813 (const_int <half_mask>)))]
6815 "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6816 [(set_attr "type" "neon_shift_imm_narrow_q")]
6819 (define_expand "aarch64_sqshrun_n<mode>"
6820 [(match_operand:<VNARROWQ> 0 "register_operand")
6821 (match_operand:SD_HSDI 1 "register_operand")
6822 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6825 rtx dst = gen_reg_rtx (<MODE>mode);
6826 emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6828 emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6833 (define_expand "aarch64_sqshrun_n<mode>"
6834 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6835 (truncate:<VNARROWQ>
6839 (match_operand:VQN 1 "register_operand")
6840 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6845 operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6846 INTVAL (operands[2]));
6847 operands[3] = CONST0_RTX (<MODE>mode);
6849 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6850 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6854 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6855 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6856 (truncate:<VNARROWQ>
6861 (sign_extend:<V2XWIDE>
6862 (match_operand:VQN 1 "register_operand" "w"))
6863 (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6864 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6865 (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6866 (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6868 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6869 "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6870 [(set_attr "type" "neon_shift_imm_narrow_q")]
6873 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6874 [(set (match_operand:<DWI> 0 "register_operand" "=w")
6880 (match_operand:SD_HSDI 1 "register_operand" "w"))
6881 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6882 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6884 (const_int <half_mask>)))]
6886 && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6887 "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6888 [(set_attr "type" "neon_shift_imm_narrow_q")]
6891 (define_expand "aarch64_sqrshrun_n<mode>"
6892 [(match_operand:<VNARROWQ> 0 "register_operand")
6893 (match_operand:SD_HSDI 1 "register_operand")
6894 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6897 int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6898 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6899 rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6900 rtx dst = gen_reg_rtx (<DWI>mode);
6901 emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6902 emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6907 (define_expand "aarch64_sqrshrun_n<mode>"
6908 [(set (match_operand:<VNARROWQ> 0 "register_operand")
6909 (truncate:<VNARROWQ>
6914 (sign_extend:<V2XWIDE>
6915 (match_operand:VQN 1 "register_operand"))
6917 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6922 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6923 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6924 operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6925 operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6926 operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6927 operands[4] = CONST0_RTX (<V2XWIDE>mode);
6929 = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6930 operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6934 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6935 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6936 (vec_concat:<VNARROWQ2>
6937 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6938 (ALL_TRUNC:<VNARROWQ>
6940 (match_operand:VQN 2 "register_operand" "w")
6941 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6942 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6943 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6944 "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6945 [(set_attr "type" "neon_shift_imm_narrow_q")]
6948 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6949 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6950 (vec_concat:<VNARROWQ2>
6951 (ALL_TRUNC:<VNARROWQ>
6953 (match_operand:VQN 2 "register_operand" "w")
6954 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6955 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6956 "TARGET_SIMD && BYTES_BIG_ENDIAN
6957 && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6958 "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6959 [(set_attr "type" "neon_shift_imm_narrow_q")]
6962 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6963 [(match_operand:<VNARROWQ2> 0 "register_operand")
6964 (match_operand:<VNARROWQ> 1 "register_operand")
6965 (ALL_TRUNC:<VNARROWQ>
6966 (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6967 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6968 "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6970 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6971 INTVAL (operands[3]));
6973 if (BYTES_BIG_ENDIAN)
6974 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6975 operands[0], operands[1], operands[2], operands[3]));
6977 emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6978 operands[0], operands[1], operands[2], operands[3]));
6983 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6984 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6985 (vec_concat:<VNARROWQ2>
6986 (match_operand:<VNARROWQ> 1 "register_operand" "0")
6987 (ALL_TRUNC:<VNARROWQ>
6988 (<TRUNC_SHIFT>:<V2XWIDE>
6990 (<TRUNCEXTEND>:<V2XWIDE>
6991 (match_operand:VQN 2 "register_operand" "w"))
6992 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6993 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6994 "TARGET_SIMD && !BYTES_BIG_ENDIAN
6995 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6996 "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6997 [(set_attr "type" "neon_shift_imm_narrow_q")]
7000 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
7001 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7002 (vec_concat:<VNARROWQ2>
7003 (ALL_TRUNC:<VNARROWQ>
7004 (<TRUNC_SHIFT>:<V2XWIDE>
7006 (<TRUNCEXTEND>:<V2XWIDE>
7007 (match_operand:VQN 2 "register_operand" "w"))
7008 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7009 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
7010 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7011 "TARGET_SIMD && BYTES_BIG_ENDIAN
7012 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7013 "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7014 [(set_attr "type" "neon_shift_imm_narrow_q")]
7017 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
7018 [(match_operand:<VNARROWQ2> 0 "register_operand")
7019 (match_operand:<VNARROWQ> 1 "register_operand")
7020 (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
7021 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7024 if (<CODE> == TRUNCATE
7025 && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
7027 rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
7028 emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
7032 /* Use this expander to create the rounding constant vector, which is
7033 1 << (shift - 1). Use wide_int here to ensure that the right TImode
7034 RTL is generated when handling the DImode expanders. */
7035 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7036 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7037 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7038 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7039 operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7040 if (BYTES_BIG_ENDIAN)
7041 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
7047 emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
7056 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
7057 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7058 (vec_concat:<VNARROWQ2>
7059 (match_operand:<VNARROWQ> 1 "register_operand" "0")
7060 (truncate:<VNARROWQ>
7064 (match_operand:VQN 2 "register_operand" "w")
7065 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7066 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
7067 (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
7068 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
7069 "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7070 [(set_attr "type" "neon_shift_imm_narrow_q")]
7073 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
7074 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7075 (vec_concat:<VNARROWQ2>
7076 (truncate:<VNARROWQ>
7080 (match_operand:VQN 2 "register_operand" "w")
7081 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7082 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
7083 (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
7084 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7085 "TARGET_SIMD && BYTES_BIG_ENDIAN"
7086 "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7087 [(set_attr "type" "neon_shift_imm_narrow_q")]
7090 (define_expand "aarch64_sqshrun2_n<mode>"
7091 [(match_operand:<VNARROWQ2> 0 "register_operand")
7092 (match_operand:<VNARROWQ> 1 "register_operand")
7093 (match_operand:VQN 2 "register_operand")
7094 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7097 operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
7098 INTVAL (operands[3]));
7099 rtx zeros = CONST0_RTX (<MODE>mode);
7101 = aarch64_simd_gen_const_vector_dup (<MODE>mode,
7102 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7103 if (BYTES_BIG_ENDIAN)
7104 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
7105 operands[1], operands[2], operands[3],
7108 emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
7109 operands[1], operands[2], operands[3],
7115 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
7116 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7117 (vec_concat:<VNARROWQ2>
7118 (match_operand:<VNARROWQ> 1 "register_operand" "0")
7119 (truncate:<VNARROWQ>
7124 (sign_extend:<V2XWIDE>
7125 (match_operand:VQN 2 "register_operand" "w"))
7126 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7127 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7128 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7129 (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
7130 "TARGET_SIMD && !BYTES_BIG_ENDIAN
7131 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7132 "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7133 [(set_attr "type" "neon_shift_imm_narrow_q")]
7136 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
7137 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7138 (vec_concat:<VNARROWQ2>
7139 (truncate:<VNARROWQ>
7144 (sign_extend:<V2XWIDE>
7145 (match_operand:VQN 2 "register_operand" "w"))
7146 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7147 (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7148 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7149 (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
7150 (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7151 "TARGET_SIMD && BYTES_BIG_ENDIAN
7152 && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7153 "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7154 [(set_attr "type" "neon_shift_imm_narrow_q")]
7157 (define_expand "aarch64_sqrshrun2_n<mode>"
7158 [(match_operand:<VNARROWQ2> 0 "register_operand")
7159 (match_operand:<VNARROWQ> 1 "register_operand")
7160 (match_operand:VQN 2 "register_operand")
7161 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7164 int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7165 wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7166 rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7167 rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7168 rtx zero = CONST0_RTX (<V2XWIDE>mode);
7170 = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7171 GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7172 operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7173 if (BYTES_BIG_ENDIAN)
7174 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7175 operands[1], operands[2], operands[3], rnd,
7178 emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7179 operands[1], operands[2], operands[3], rnd,
7185 ;; cm(eq|ge|gt|lt|le)
7186 ;; Note, we have constraints for Dz and Z as different expanders
7187 ;; have different ideas of what should be passed to this pattern.
7189 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7190 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7192 (COMPARISONS:<V_INT_EQUIV>
7193 (match_operand:VDQ_I 1 "register_operand")
7194 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")
7197 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
7198 [ w , w , w ; neon_compare<q> ] cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7199 [ w , w , ZDz ; neon_compare_zero<q> ] cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0
7203 (define_insn_and_split "aarch64_cm<optab>di"
7204 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7207 (match_operand:DI 1 "register_operand" "w,w,r")
7208 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7210 (clobber (reg:CC CC_REGNUM))]
7213 "&& reload_completed"
7214 [(set (match_operand:DI 0 "register_operand")
7217 (match_operand:DI 1 "register_operand")
7218 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7221 /* If we are in the general purpose register file,
7222 we split to a sequence of comparison and store. */
7223 if (GP_REGNUM_P (REGNO (operands[0]))
7224 && GP_REGNUM_P (REGNO (operands[1])))
7226 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7227 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7228 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7229 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7232 /* Otherwise, we expand to a similar pattern which does not
7233 clobber CC_REGNUM. */
7235 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7238 (define_insn "*aarch64_cm<optab>di"
7239 [(set (match_operand:DI 0 "register_operand")
7242 (match_operand:DI 1 "register_operand")
7243 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7245 "TARGET_SIMD && reload_completed"
7246 {@ [ cons: =0 , 1 , 2 ; attrs: type ]
7247 [ w , w , w ; neon_compare ] cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7248 [ w , w , ZDz ; neon_compare_zero ] cm<optab>\t%d0, %d1, #0
7254 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7255 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7257 (UCOMPARISONS:<V_INT_EQUIV>
7258 (match_operand:VDQ_I 1 "register_operand" "w")
7259 (match_operand:VDQ_I 2 "register_operand" "w")
7262 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7263 [(set_attr "type" "neon_compare<q>")]
7266 (define_insn_and_split "aarch64_cm<optab>di"
7267 [(set (match_operand:DI 0 "register_operand" "=w,r")
7270 (match_operand:DI 1 "register_operand" "w,r")
7271 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7273 (clobber (reg:CC CC_REGNUM))]
7276 "&& reload_completed"
7277 [(set (match_operand:DI 0 "register_operand")
7280 (match_operand:DI 1 "register_operand")
7281 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7284 /* If we are in the general purpose register file,
7285 we split to a sequence of comparison and store. */
7286 if (GP_REGNUM_P (REGNO (operands[0]))
7287 && GP_REGNUM_P (REGNO (operands[1])))
7289 machine_mode mode = CCmode;
7290 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7291 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7292 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7295 /* Otherwise, we expand to a similar pattern which does not
7296 clobber CC_REGNUM. */
7298 [(set_attr "type" "neon_compare,multiple")]
7301 (define_insn "*aarch64_cm<optab>di"
7302 [(set (match_operand:DI 0 "register_operand" "=w")
7305 (match_operand:DI 1 "register_operand" "w")
7306 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7308 "TARGET_SIMD && reload_completed"
7309 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7310 [(set_attr "type" "neon_compare")]
7315 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7316 ;; we don't have any insns using ne, and aarch64_vcond outputs
7317 ;; not (neg (eq (and x y) 0))
7318 ;; which is rewritten by simplify_rtx as
7319 ;; plus (eq (and x y) 0) -1.
7321 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7322 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7326 (match_operand:VDQ_I 1 "register_operand" "w")
7327 (match_operand:VDQ_I 2 "register_operand" "w"))
7328 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7329 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7332 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7333 [(set_attr "type" "neon_tst<q>")]
7336 ;; One can also get a cmtsts by having to combine a
7337 ;; not (neq (eq x 0)) in which case you rewrite it to
7338 ;; a comparison against itself
7340 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7341 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7344 (match_operand:VDQ_I 1 "register_operand" "w")
7345 (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7346 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7349 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7350 [(set_attr "type" "neon_tst<q>")]
7353 (define_insn_and_split "aarch64_cmtstdi"
7354 [(set (match_operand:DI 0 "register_operand" "=w,r")
7358 (match_operand:DI 1 "register_operand" "w,r")
7359 (match_operand:DI 2 "register_operand" "w,r"))
7361 (clobber (reg:CC CC_REGNUM))]
7364 "&& reload_completed"
7365 [(set (match_operand:DI 0 "register_operand")
7369 (match_operand:DI 1 "register_operand")
7370 (match_operand:DI 2 "register_operand"))
7373 /* If we are in the general purpose register file,
7374 we split to a sequence of comparison and store. */
7375 if (GP_REGNUM_P (REGNO (operands[0]))
7376 && GP_REGNUM_P (REGNO (operands[1])))
7378 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7379 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7380 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7381 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7382 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7385 /* Otherwise, we expand to a similar pattern which does not
7386 clobber CC_REGNUM. */
7388 [(set_attr "type" "neon_tst,multiple")]
7391 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7392 [(set (match_operand:DI 0 "register_operand" "=w")
7396 (match_operand:DI 1 "register_operand" "w")
7397 (match_operand:DI 2 "register_operand" "w"))
7400 "cmtst\t%d0, %d1, %d2"
7401 [(set_attr "type" "neon_tst")]
7404 ;; fcm(eq|ge|gt|le|lt)
7406 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7407 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7409 (COMPARISONS:<V_INT_EQUIV>
7410 (match_operand:VHSDF_HSDF 1 "register_operand")
7411 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero")
7414 {@ [ cons: =0 , 1 , 2 ]
7415 [ w , w , w ] fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7416 [ w , w , YDz ] fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0
7418 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7422 ;; Note we can also handle what would be fac(le|lt) by
7423 ;; generating fac(ge|gt).
7425 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7426 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7428 (FAC_COMPARISONS:<V_INT_EQUIV>
7430 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7432 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7435 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7436 [(set_attr "type" "neon_fp_compare_<stype><q>")]
7441 ;; ADDP with two registers semantically concatenates them and performs
7442 ;; a pairwise addition on the result. For 128-bit input modes represent this
7443 ;; as a concatentation of the pairwise addition results of the two input
7444 ;; registers. This allow us to avoid using intermediate 256-bit modes.
7445 (define_insn "aarch64_addp<mode>_insn"
7446 [(set (match_operand:VQ_I 0 "register_operand" "=w")
7450 (match_operand:VQ_I 1 "register_operand" "w")
7451 (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7454 (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7457 (match_operand:VQ_I 2 "register_operand" "w")
7462 "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7463 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7464 [(set_attr "type" "neon_reduc_add<q>")]
7467 ;; For 64-bit input modes an ADDP is represented as a concatentation
7468 ;; of the input registers into an 128-bit register which is then fed
7469 ;; into a pairwise add. That way we avoid having to create intermediate
7470 ;; 32-bit vector modes.
7471 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7472 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7476 (match_operand:VD_BHSI 1 "register_operand" "w")
7477 (match_operand:VD_BHSI 2 "register_operand" "w"))
7478 (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7483 (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7484 "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7485 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7486 [(set_attr "type" "neon_reduc_add<q>")]
7489 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7490 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7491 ;; Split into the 128-bit ADDP form and extract the low half.
7492 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7493 [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7496 (match_operand:VQ_I 1 "register_operand" "w")
7497 (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7500 (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7501 "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7507 if (can_create_pseudo_p ())
7508 scratch = gen_reg_rtx (<MODE>mode);
7510 scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7512 emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7513 operands[2], operands[3]));
7514 emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7519 (define_expand "aarch64_addp<mode>"
7520 [(match_operand:VDQ_I 0 "register_operand")
7521 (match_operand:VDQ_I 1 "register_operand")
7522 (match_operand:VDQ_I 2 "register_operand")]
7525 int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7526 if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7528 rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7529 rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7530 emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7531 operands[2], par_even, par_odd));
7538 (define_expand "sqrt<mode>2"
7539 [(set (match_operand:VHSDF 0 "register_operand")
7540 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7543 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7547 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7548 [(set (match_operand:VHSDF 0 "register_operand" "=w")
7549 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7551 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7552 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7555 ;; Patterns for vector struct loads and stores.
7557 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7558 [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7559 (unspec:VSTRUCT_2Q [
7560 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7563 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7564 [(set_attr "type" "neon_load2_2reg<q>")]
7567 (define_insn "@aarch64_simd_ld2r<vstruct_elt>"
7568 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7569 (unspec:VSTRUCT_2QD [
7570 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7573 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7574 [(set_attr "type" "neon_load2_all_lanes<q>")]
7577 (define_insn "@aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7578 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7579 (unspec:VSTRUCT_2QD [
7580 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7581 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7582 (match_operand:SI 3 "immediate_operand" "i")]
7586 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7587 INTVAL (operands[3]));
7588 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7590 [(set_attr "type" "neon_load2_one_lane")]
7593 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7594 [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7595 (unspec:VSTRUCT_2Q [
7596 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7600 if (BYTES_BIG_ENDIAN)
7602 rtx tmp = gen_reg_rtx (<MODE>mode);
7603 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7604 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7605 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7606 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7609 emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7613 (define_insn "aarch64_simd_st2<vstruct_elt>"
7614 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7615 (unspec:VSTRUCT_2Q [
7616 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7619 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7620 [(set_attr "type" "neon_store2_2reg<q>")]
7623 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7624 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7625 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7626 (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7627 (match_operand:SI 2 "immediate_operand" "i")]
7631 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7632 INTVAL (operands[2]));
7633 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7635 [(set_attr "type" "neon_store2_one_lane<q>")]
7638 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7639 [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7640 (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7644 if (BYTES_BIG_ENDIAN)
7646 rtx tmp = gen_reg_rtx (<MODE>mode);
7647 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7648 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7649 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7650 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7653 emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7657 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7658 [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7659 (unspec:VSTRUCT_3Q [
7660 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7663 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7664 [(set_attr "type" "neon_load3_3reg<q>")]
7667 (define_insn "@aarch64_simd_ld3r<vstruct_elt>"
7668 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7669 (unspec:VSTRUCT_3QD [
7670 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7673 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7674 [(set_attr "type" "neon_load3_all_lanes<q>")]
7677 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7678 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7679 (unspec:VSTRUCT_3QD [
7680 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7681 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7682 (match_operand:SI 3 "immediate_operand" "i")]
7686 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7687 INTVAL (operands[3]));
7688 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7690 [(set_attr "type" "neon_load3_one_lane")]
7693 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7694 [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7695 (unspec:VSTRUCT_3Q [
7696 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7700 if (BYTES_BIG_ENDIAN)
7702 rtx tmp = gen_reg_rtx (<MODE>mode);
7703 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7704 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7705 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7706 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7709 emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7713 (define_insn "aarch64_simd_st3<vstruct_elt>"
7714 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7715 (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7718 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7719 [(set_attr "type" "neon_store3_3reg<q>")]
7722 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7723 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7724 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7725 (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7726 (match_operand:SI 2 "immediate_operand" "i")]
7730 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7731 INTVAL (operands[2]));
7732 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7734 [(set_attr "type" "neon_store3_one_lane<q>")]
7737 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7738 [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7739 (unspec:VSTRUCT_3Q [
7740 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7744 if (BYTES_BIG_ENDIAN)
7746 rtx tmp = gen_reg_rtx (<MODE>mode);
7747 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7748 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7749 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7750 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7753 emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7757 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7758 [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7759 (unspec:VSTRUCT_4Q [
7760 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7763 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7764 [(set_attr "type" "neon_load4_4reg<q>")]
7767 (define_insn "@aarch64_simd_ld4r<vstruct_elt>"
7768 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7769 (unspec:VSTRUCT_4QD [
7770 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7773 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7774 [(set_attr "type" "neon_load4_all_lanes<q>")]
7777 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7778 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7779 (unspec:VSTRUCT_4QD [
7780 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7781 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7782 (match_operand:SI 3 "immediate_operand" "i")]
7786 operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7787 INTVAL (operands[3]));
7788 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7790 [(set_attr "type" "neon_load4_one_lane")]
7793 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7794 [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7795 (unspec:VSTRUCT_4Q [
7796 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7800 if (BYTES_BIG_ENDIAN)
7802 rtx tmp = gen_reg_rtx (<MODE>mode);
7803 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7804 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7805 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7806 emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7809 emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7813 (define_insn "aarch64_simd_st4<vstruct_elt>"
7814 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7815 (unspec:VSTRUCT_4Q [
7816 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7819 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7820 [(set_attr "type" "neon_store4_4reg<q>")]
7823 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7824 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7825 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7826 (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7827 (match_operand:SI 2 "immediate_operand" "i")]
7831 operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7832 INTVAL (operands[2]));
7833 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7835 [(set_attr "type" "neon_store4_one_lane<q>")]
7838 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7839 [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7840 (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7844 if (BYTES_BIG_ENDIAN)
7846 rtx tmp = gen_reg_rtx (<MODE>mode);
7847 rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7848 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7849 emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7850 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7853 emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7857 ;; Patterns for rcpc3 vector lane loads and stores.
7859 (define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
7860 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
7861 (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
7862 (match_operand:SI 2 "immediate_operand" "i")]
7866 operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
7867 INTVAL (operands[2]));
7868 return "stl1\\t{%S1.<Vetype>}[%2], %0";
7870 [(set_attr "type" "neon_store2_one_lane")]
7873 (define_expand "aarch64_vec_stl1_lane<mode>"
7874 [(match_operand:DI 0 "register_operand")
7875 (match_operand:V12DIF 1 "register_operand")
7876 (match_operand:SI 2 "immediate_operand")]
7879 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7880 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7882 aarch64_simd_lane_bounds (operands[2], 0,
7883 GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7884 emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
7885 operands[1], operands[2]));
7889 (define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
7890 [(set (match_operand:V12DIF 0 "register_operand" "=w")
7892 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
7893 (match_operand:V12DIF 2 "register_operand" "0")
7894 (match_operand:SI 3 "immediate_operand" "i")]
7895 UNSPEC_LDAP1_LANE))]
7898 operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
7899 INTVAL (operands[3]));
7900 return "ldap1\\t{%S0.<Vetype>}[%3], %1";
7902 [(set_attr "type" "neon_load2_one_lane")]
7905 (define_expand "aarch64_vec_ldap1_lane<mode>"
7906 [(match_operand:V12DIF 0 "register_operand")
7907 (match_operand:DI 1 "register_operand")
7908 (match_operand:V12DIF 2 "register_operand")
7909 (match_operand:SI 3 "immediate_operand")]
7912 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7913 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7915 aarch64_simd_lane_bounds (operands[3], 0,
7916 GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7917 emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
7918 mem, operands[2], operands[3]));
7922 (define_insn_and_split "aarch64_rev_reglist<mode>"
7923 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7925 [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7926 (match_operand:V16QI 2 "register_operand" "w")]
7927 UNSPEC_REV_REGLIST))]
7930 "&& reload_completed"
7934 int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7935 for (i = 0; i < nregs; i++)
7937 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7938 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7939 emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7943 [(set_attr "type" "neon_tbl1_q")
7944 (set_attr "length" "<insn_count>")]
7947 ;; Reload patterns for AdvSIMD register list operands.
7949 (define_expand "mov<mode>"
7950 [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7951 (match_operand:VSTRUCT_QD 1 "general_operand"))]
7954 if (known_eq (GET_MODE_SIZE (<MODE>mode), 16)
7955 && operands[1] == CONST0_RTX (<MODE>mode)
7956 && MEM_P (operands[0])
7957 && (can_create_pseudo_p ()
7958 || memory_address_p (TImode, XEXP (operands[0], 0))))
7960 operands[0] = adjust_address (operands[0], TImode, 0);
7961 operands[1] = CONST0_RTX (TImode);
7963 else if (can_create_pseudo_p ())
7965 if (GET_CODE (operands[0]) != REG)
7966 operands[1] = force_reg (<MODE>mode, operands[1]);
7970 (define_expand "mov<mode>"
7971 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7972 (match_operand:VSTRUCT 1 "general_operand"))]
7975 if (can_create_pseudo_p ())
7977 if (GET_CODE (operands[0]) != REG)
7978 operands[1] = force_reg (<MODE>mode, operands[1]);
7982 (define_expand "movv8di"
7983 [(set (match_operand:V8DI 0 "nonimmediate_operand")
7984 (match_operand:V8DI 1 "general_operand"))]
7987 if (can_create_pseudo_p () && MEM_P (operands[0]))
7988 operands[1] = force_reg (V8DImode, operands[1]);
7991 (define_expand "@aarch64_ld1x3<vstruct_elt>"
7992 [(match_operand:VSTRUCT_3QD 0 "register_operand")
7993 (match_operand:DI 1 "register_operand")]
7996 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7997 emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
8001 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
8002 [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
8004 [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
8007 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8008 [(set_attr "type" "neon_load1_3reg<q>")]
8011 (define_expand "@aarch64_ld1x4<vstruct_elt>"
8012 [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
8013 (match_operand:DI 1 "register_operand" "r")]
8016 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8017 emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
8021 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
8022 [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
8024 [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
8027 "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8028 [(set_attr "type" "neon_load1_4reg<q>")]
8031 (define_expand "@aarch64_st1x2<vstruct_elt>"
8032 [(match_operand:DI 0 "register_operand")
8033 (match_operand:VSTRUCT_2QD 1 "register_operand")]
8036 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8037 emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
8041 (define_insn "aarch64_st1_x2_<vstruct_elt>"
8042 [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
8044 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
8047 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8048 [(set_attr "type" "neon_store1_2reg<q>")]
8051 (define_expand "@aarch64_st1x3<vstruct_elt>"
8052 [(match_operand:DI 0 "register_operand")
8053 (match_operand:VSTRUCT_3QD 1 "register_operand")]
8056 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8057 emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
8061 (define_insn "aarch64_st1_x3_<vstruct_elt>"
8062 [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
8064 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
8067 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8068 [(set_attr "type" "neon_store1_3reg<q>")]
8071 (define_expand "@aarch64_st1x4<vstruct_elt>"
8072 [(match_operand:DI 0 "register_operand" "")
8073 (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
8076 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8077 emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
8081 (define_insn "aarch64_st1_x4_<vstruct_elt>"
8082 [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
8084 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
8087 "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8088 [(set_attr "type" "neon_store1_4reg<q>")]
8091 (define_insn "*aarch64_movv8di"
8092 [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
8093 (match_operand:V8DI 1 "general_operand" " r,r,m"))]
8094 "(register_operand (operands[0], V8DImode)
8095 || register_operand (operands[1], V8DImode))"
8097 [(set_attr "type" "multiple,multiple,multiple")
8098 (set_attr "length" "32,16,16")]
8101 (define_insn "aarch64_be_ld1<mode>"
8102 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
8103 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
8104 "aarch64_simd_struct_operand" "Utv")]
8107 "ld1\\t{%0<Vmtype>}, %1"
8108 [(set_attr "type" "neon_load1_1reg<q>")]
8111 (define_insn "aarch64_be_st1<mode>"
8112 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
8113 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
8116 "st1\\t{%1<Vmtype>}, %0"
8117 [(set_attr "type" "neon_store1_1reg<q>")]
8120 (define_insn "*aarch64_mov<mode>"
8121 [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
8122 (match_operand:VSTRUCT_2D 1 "general_operand"))]
8124 && (register_operand (operands[0], <MODE>mode)
8125 || register_operand (operands[1], <MODE>mode))"
8126 {@ [ cons: =0 , 1 ; attrs: type , length ]
8127 [ w , w ; multiple , 8 ] #
8128 [ m , w ; neon_stp , 4 ] stp\t%d1, %R1, %0
8129 [ w , m ; neon_ldp , 4 ] ldp\t%d0, %R0, %1
8133 (define_insn "*aarch64_mov<mode>"
8134 [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
8135 (match_operand:VSTRUCT_2Q 1 "general_operand"))]
8137 && (register_operand (operands[0], <MODE>mode)
8138 || register_operand (operands[1], <MODE>mode))"
8139 {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8140 [ w , w ; multiple , simd , 8 ] #
8141 [ m , w ; neon_stp_q , * , 4 ] stp\t%q1, %R1, %0
8142 [ w , m ; neon_ldp_q , * , 4 ] ldp\t%q0, %R0, %1
8146 (define_insn "*aarch64_movoi"
8147 [(set (match_operand:OI 0 "nonimmediate_operand")
8148 (match_operand:OI 1 "general_operand"))]
8150 && (register_operand (operands[0], OImode)
8151 || register_operand (operands[1], OImode))"
8152 {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8153 [ w , w ; multiple , simd , 8 ] #
8154 [ m , w ; neon_stp_q , * , 4 ] stp\t%q1, %R1, %0
8155 [ w , m ; neon_ldp_q , * , 4 ] ldp\t%q0, %R0, %1
8159 (define_insn "*aarch64_mov<mode>"
8160 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
8161 (match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
8163 && (register_operand (operands[0], <MODE>mode)
8164 || register_operand (operands[1], <MODE>mode))"
8166 [(set_attr "type" "multiple")
8167 (set_attr "arch" "fp<q>,*,*")
8168 (set_attr "length" "12,8,8")]
8171 (define_insn "*aarch64_movci"
8172 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8173 (match_operand:CI 1 "general_operand" " w,w,o"))]
8175 && (register_operand (operands[0], CImode)
8176 || register_operand (operands[1], CImode))"
8178 [(set_attr "type" "multiple")
8179 (set_attr "arch" "simd,*,*")
8180 (set_attr "length" "12,8,8")]
8183 (define_insn "*aarch64_mov<mode>"
8184 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8185 (match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
8187 && (register_operand (operands[0], <MODE>mode)
8188 || register_operand (operands[1], <MODE>mode))"
8190 [(set_attr "type" "multiple")
8191 (set_attr "arch" "fp<q>,*,*")
8192 (set_attr "length" "16,8,8")]
8195 (define_insn "*aarch64_movxi"
8196 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8197 (match_operand:XI 1 "general_operand" " w,w,o"))]
8199 && (register_operand (operands[0], XImode)
8200 || register_operand (operands[1], XImode))"
8202 [(set_attr "type" "multiple")
8203 (set_attr "arch" "simd,*,*")
8204 (set_attr "length" "16,8,8")]
8208 [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8209 (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8210 "TARGET_FLOAT && reload_completed"
8213 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8218 [(set (match_operand:OI 0 "register_operand")
8219 (match_operand:OI 1 "register_operand"))]
8220 "TARGET_FLOAT && reload_completed"
8223 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8228 [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8229 (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8230 "TARGET_FLOAT && reload_completed"
8233 if (register_operand (operands[0], <MODE>mode)
8234 && register_operand (operands[1], <MODE>mode))
8235 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8238 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8239 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8240 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8242 simplify_gen_subreg (pair_mode, operands[1],
8244 emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8245 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8249 gen_lowpart (<VSTRUCT_ELT>mode,
8250 simplify_gen_subreg (<VSTRUCT_ELT>mode,
8259 [(set (match_operand:CI 0 "nonimmediate_operand")
8260 (match_operand:CI 1 "general_operand"))]
8261 "TARGET_FLOAT && reload_completed"
8264 if (register_operand (operands[0], CImode)
8265 && register_operand (operands[1], CImode))
8266 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8269 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8270 simplify_gen_subreg (OImode, operands[1], CImode, 0));
8271 emit_move_insn (gen_lowpart (V16QImode,
8272 simplify_gen_subreg (TImode, operands[0],
8274 gen_lowpart (V16QImode,
8275 simplify_gen_subreg (TImode, operands[1],
8282 [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8283 (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8284 "TARGET_FLOAT && reload_completed"
8287 if (register_operand (operands[0], <MODE>mode)
8288 && register_operand (operands[1], <MODE>mode))
8289 aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8292 int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8293 machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8294 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8296 simplify_gen_subreg (pair_mode, operands[1],
8298 emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8299 <MODE>mode, 2 * elt_size),
8300 simplify_gen_subreg (pair_mode, operands[1],
8301 <MODE>mode, 2 * elt_size));
8307 [(set (match_operand:XI 0 "nonimmediate_operand")
8308 (match_operand:XI 1 "general_operand"))]
8309 "TARGET_FLOAT && reload_completed"
8312 if (register_operand (operands[0], XImode)
8313 && register_operand (operands[1], XImode))
8314 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8317 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8318 simplify_gen_subreg (OImode, operands[1], XImode, 0));
8319 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8320 simplify_gen_subreg (OImode, operands[1], XImode, 32));
8326 [(set (match_operand:V8DI 0 "nonimmediate_operand")
8327 (match_operand:V8DI 1 "general_operand"))]
8331 if (register_operand (operands[0], V8DImode)
8332 && register_operand (operands[1], V8DImode))
8334 aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8337 else if ((register_operand (operands[0], V8DImode)
8338 && memory_operand (operands[1], V8DImode))
8339 || (memory_operand (operands[0], V8DImode)
8340 && register_operand (operands[1], V8DImode)))
8342 /* V8DI only guarantees 8-byte alignment, whereas TImode requires 16. */
8343 auto mode = STRICT_ALIGNMENT ? DImode : TImode;
8344 int increment = GET_MODE_SIZE (mode);
8345 std::pair<rtx, rtx> last_pair = {};
8346 for (int offset = 0; offset < 64; offset += increment)
8348 std::pair<rtx, rtx> pair = {
8349 simplify_gen_subreg (mode, operands[0], V8DImode, offset),
8350 simplify_gen_subreg (mode, operands[1], V8DImode, offset)
8352 if (register_operand (pair.first, mode)
8353 && reg_overlap_mentioned_p (pair.first, pair.second))
8356 emit_move_insn (pair.first, pair.second);
8358 if (last_pair.first)
8359 emit_move_insn (last_pair.first, last_pair.second);
8366 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8367 [(match_operand:VSTRUCT_QD 0 "register_operand")
8368 (match_operand:DI 1 "register_operand")]
8371 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8372 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8374 emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8378 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8379 [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8380 (unspec:VSTRUCT_2DNX [
8381 (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8384 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8385 [(set_attr "type" "neon_load2_2reg<q>")]
8388 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8389 [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8390 (unspec:VSTRUCT_2DX [
8391 (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8394 "ld1\\t{%S0.1d - %T0.1d}, %1"
8395 [(set_attr "type" "neon_load1_2reg<q>")]
8398 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8399 [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8400 (unspec:VSTRUCT_3DNX [
8401 (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8404 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8405 [(set_attr "type" "neon_load3_3reg<q>")]
8408 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8409 [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8410 (unspec:VSTRUCT_3DX [
8411 (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8414 "ld1\\t{%S0.1d - %U0.1d}, %1"
8415 [(set_attr "type" "neon_load1_3reg<q>")]
8418 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8419 [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8420 (unspec:VSTRUCT_4DNX [
8421 (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8424 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8425 [(set_attr "type" "neon_load4_4reg<q>")]
8428 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8429 [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8430 (unspec:VSTRUCT_4DX [
8431 (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8434 "ld1\\t{%S0.1d - %V0.1d}, %1"
8435 [(set_attr "type" "neon_load1_4reg<q>")]
8438 (define_expand "@aarch64_ld<nregs><vstruct_elt>"
8439 [(match_operand:VSTRUCT_D 0 "register_operand")
8440 (match_operand:DI 1 "register_operand")]
8443 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8444 emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8448 (define_expand "@aarch64_ld1<VALL_F16:mode>"
8449 [(match_operand:VALL_F16 0 "register_operand")
8450 (match_operand:DI 1 "register_operand")]
8453 machine_mode mode = <VALL_F16:MODE>mode;
8454 rtx mem = gen_rtx_MEM (mode, operands[1]);
8456 if (BYTES_BIG_ENDIAN)
8457 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8459 emit_move_insn (operands[0], mem);
8463 (define_expand "@aarch64_ld<nregs><vstruct_elt>"
8464 [(match_operand:VSTRUCT_Q 0 "register_operand")
8465 (match_operand:DI 1 "register_operand")]
8468 rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8469 emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8473 (define_expand "@aarch64_ld1x2<vstruct_elt>"
8474 [(match_operand:VSTRUCT_2QD 0 "register_operand")
8475 (match_operand:DI 1 "register_operand")]
8478 machine_mode mode = <MODE>mode;
8479 rtx mem = gen_rtx_MEM (mode, operands[1]);
8481 emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8485 (define_expand "@aarch64_ld<nregs>_lane<vstruct_elt>"
8486 [(match_operand:VSTRUCT_QD 0 "register_operand")
8487 (match_operand:DI 1 "register_operand")
8488 (match_operand:VSTRUCT_QD 2 "register_operand")
8489 (match_operand:SI 3 "immediate_operand")]
8492 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8493 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8495 aarch64_simd_lane_bounds (operands[3], 0,
8496 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8497 emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8498 mem, operands[2], operands[3]));
8502 ;; Permuted-store expanders for neon intrinsics.
8504 ;; Permute instructions
8508 (define_expand "vec_perm<mode>"
8509 [(match_operand:VB 0 "register_operand")
8510 (match_operand:VB 1 "register_operand")
8511 (match_operand:VB 2 "register_operand")
8512 (match_operand:VB 3 "register_operand")]
8515 aarch64_expand_vec_perm (operands[0], operands[1],
8516 operands[2], operands[3], <nunits>);
8520 (define_insn "aarch64_qtbl1<mode>"
8521 [(set (match_operand:VB 0 "register_operand" "=w")
8522 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8523 (match_operand:VB 2 "register_operand" "w")]
8526 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8527 [(set_attr "type" "neon_tbl1<q>")]
8530 (define_insn "aarch64_qtbx1<mode>"
8531 [(set (match_operand:VB 0 "register_operand" "=w")
8532 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8533 (match_operand:V16QI 2 "register_operand" "w")
8534 (match_operand:VB 3 "register_operand" "w")]
8537 "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8538 [(set_attr "type" "neon_tbl1<q>")]
8541 ;; Two source registers.
8543 (define_insn "aarch64_qtbl2<mode>"
8544 [(set (match_operand:VB 0 "register_operand" "=w")
8545 (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8546 (match_operand:VB 2 "register_operand" "w")]
8549 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8550 [(set_attr "type" "neon_tbl2")]
8553 (define_insn "aarch64_qtbx2<mode>"
8554 [(set (match_operand:VB 0 "register_operand" "=w")
8555 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8556 (match_operand:V2x16QI 2 "register_operand" "w")
8557 (match_operand:VB 3 "register_operand" "w")]
8560 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8561 [(set_attr "type" "neon_tbl2")]
8564 ;; Three source registers.
8566 (define_insn "aarch64_qtbl3<mode>"
8567 [(set (match_operand:VB 0 "register_operand" "=w")
8568 (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8569 (match_operand:VB 2 "register_operand" "w")]
8572 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8573 [(set_attr "type" "neon_tbl3")]
8576 (define_insn "aarch64_qtbx3<mode>"
8577 [(set (match_operand:VB 0 "register_operand" "=w")
8578 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8579 (match_operand:V3x16QI 2 "register_operand" "w")
8580 (match_operand:VB 3 "register_operand" "w")]
8583 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8584 [(set_attr "type" "neon_tbl3")]
8587 ;; Four source registers.
8589 (define_insn "aarch64_qtbl4<mode>"
8590 [(set (match_operand:VB 0 "register_operand" "=w")
8591 (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8592 (match_operand:VB 2 "register_operand" "w")]
8595 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8596 [(set_attr "type" "neon_tbl4")]
8599 (define_insn "aarch64_qtbx4<mode>"
8600 [(set (match_operand:VB 0 "register_operand" "=w")
8601 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8602 (match_operand:V4x16QI 2 "register_operand" "w")
8603 (match_operand:VB 3 "register_operand" "w")]
8606 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8607 [(set_attr "type" "neon_tbl4")]
8610 (define_insn_and_split "aarch64_combinev16qi"
8611 [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8612 (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8613 (match_operand:V16QI 2 "register_operand" "w")]
8620 aarch64_split_combinev16qi (operands);
8623 [(set_attr "type" "multiple")]
8626 ;; This instruction's pattern is generated directly by
8627 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8628 ;; need corresponding changes there.
8629 (define_insn "@aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8630 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8631 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8632 (match_operand:VALL_F16 2 "register_operand" "w")]
8635 "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8636 [(set_attr "type" "neon_permute<q>")]
8639 ;; ZIP1 ignores the contents of the upper halves of the registers,
8640 ;; so we can describe 128-bit operations in terms of 64-bit inputs.
8641 (define_insn "aarch64_zip1<mode>_low"
8642 [(set (match_operand:VQ 0 "register_operand" "=w")
8643 (unspec:VQ [(match_operand:<VHALF> 1 "register_operand" "w")
8644 (match_operand:<VHALF> 2 "register_operand" "w")]
8647 "zip1\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8648 [(set_attr "type" "neon_permute_q")]
8651 ;; This instruction's pattern is generated directly by
8652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8653 ;; need corresponding changes there. Note that the immediate (third)
8654 ;; operand is a lane index not a byte index.
8655 (define_insn "@aarch64_ext<mode>"
8656 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8657 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8658 (match_operand:VALL_F16 2 "register_operand" "w")
8659 (match_operand:SI 3 "immediate_operand" "i")]
8663 operands[3] = GEN_INT (INTVAL (operands[3])
8664 * GET_MODE_UNIT_SIZE (<MODE>mode));
8665 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8667 [(set_attr "type" "neon_ext<q>")]
8670 ;; This instruction's pattern is generated directly by
8671 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8672 ;; need corresponding changes there.
8673 (define_insn "@aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8674 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8675 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8678 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8679 [(set_attr "type" "neon_rev<q>")]
8682 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8683 [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8684 (unspec:VSTRUCT_2DNX [
8685 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8688 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8689 [(set_attr "type" "neon_store2_2reg")]
8692 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8693 [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8694 (unspec:VSTRUCT_2DX [
8695 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8698 "st1\\t{%S1.1d - %T1.1d}, %0"
8699 [(set_attr "type" "neon_store1_2reg")]
8702 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8703 [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8704 (unspec:VSTRUCT_3DNX [
8705 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8708 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8709 [(set_attr "type" "neon_store3_3reg")]
8712 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8713 [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8714 (unspec:VSTRUCT_3DX [
8715 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8718 "st1\\t{%S1.1d - %U1.1d}, %0"
8719 [(set_attr "type" "neon_store1_3reg")]
8722 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8723 [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8724 (unspec:VSTRUCT_4DNX [
8725 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8728 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8729 [(set_attr "type" "neon_store4_4reg")]
8732 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8733 [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8734 (unspec:VSTRUCT_4DX [
8735 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8738 "st1\\t{%S1.1d - %V1.1d}, %0"
8739 [(set_attr "type" "neon_store1_4reg")]
8742 (define_expand "@aarch64_st<nregs><vstruct_elt>"
8743 [(match_operand:DI 0 "register_operand")
8744 (match_operand:VSTRUCT_D 1 "register_operand")]
8747 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8748 emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8752 (define_expand "@aarch64_st<nregs><vstruct_elt>"
8753 [(match_operand:DI 0 "register_operand")
8754 (match_operand:VSTRUCT_Q 1 "register_operand")]
8757 rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8758 emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8762 (define_expand "@aarch64_st<nregs>_lane<vstruct_elt>"
8763 [(match_operand:DI 0 "register_operand")
8764 (match_operand:VSTRUCT_QD 1 "register_operand")
8765 (match_operand:SI 2 "immediate_operand")]
8768 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8769 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8771 aarch64_simd_lane_bounds (operands[2], 0,
8772 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8773 emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8774 operands[1], operands[2]));
8778 (define_expand "@aarch64_st1<VALL_F16:mode>"
8779 [(match_operand:DI 0 "register_operand")
8780 (match_operand:VALL_F16 1 "register_operand")]
8783 machine_mode mode = <VALL_F16:MODE>mode;
8784 rtx mem = gen_rtx_MEM (mode, operands[0]);
8786 if (BYTES_BIG_ENDIAN)
8787 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8789 emit_move_insn (mem, operands[1]);
8793 ;; Standard pattern name vec_init<mode><Vel>.
8795 (define_expand "vec_init<mode><Vel>"
8796 [(match_operand:VALL_F16 0 "register_operand")
8797 (match_operand 1 "" "")]
8800 aarch64_expand_vector_init (operands[0], operands[1]);
8804 (define_expand "vec_init<mode><Vhalf>"
8805 [(match_operand:VQ_NO2E 0 "register_operand")
8806 (match_operand 1 "" "")]
8809 aarch64_expand_vector_init (operands[0], operands[1]);
8813 (define_insn "*aarch64_simd_ld1r<mode>"
8814 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8815 (vec_duplicate:VALL_F16
8816 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8818 "ld1r\\t{%0.<Vtype>}, %1"
8819 [(set_attr "type" "neon_load1_all_lanes")]
8822 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8823 [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8824 (unspec:VSTRUCT_2QD [
8825 (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8828 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8829 [(set_attr "type" "neon_load1_2reg<q>")]
8833 (define_insn "@aarch64_frecpe<mode>"
8834 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8836 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8839 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8840 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8843 (define_insn "aarch64_frecpx<mode>"
8844 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8845 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8848 "frecpx\t%<s>0, %<s>1"
8849 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8852 (define_insn "@aarch64_frecps<mode>"
8853 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8855 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8856 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8859 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8860 [(set_attr "type" "neon_fp_recps_<stype><q>")]
8863 (define_insn "aarch64_urecpe<mode>"
8864 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8865 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8868 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8869 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8871 ;; Standard pattern name vec_extract<mode><Vel>.
8873 (define_expand "vec_extract<mode><Vel>"
8874 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8875 (match_operand:VALL_F16 1 "register_operand")
8876 (match_operand:SI 2 "immediate_operand")]
8880 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8884 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8885 (define_expand "vec_extract<mode><Vhalf>"
8886 [(match_operand:<VHALF> 0 "register_operand")
8887 (match_operand:VQMOV_NO2E 1 "register_operand")
8888 (match_operand 2 "immediate_operand")]
8891 int start = INTVAL (operands[2]);
8892 gcc_assert (start == 0 || start == 1);
8893 start *= <nunits> / 2;
8894 rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8895 emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8899 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8900 (define_expand "vec_extract<mode><V1half>"
8901 [(match_operand:<V1HALF> 0 "register_operand")
8902 (match_operand:VQ_2E 1 "register_operand")
8903 (match_operand 2 "immediate_operand")]
8906 /* V1DI and V1DF are rarely used by other patterns, so it should be better
8907 to hide it in a subreg destination of a normal DI or DF op. */
8908 rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8909 emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8915 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8916 [(set (match_operand:V16QI 0 "register_operand" "=w")
8919 (match_operand:V16QI 1 "register_operand" "%0")
8920 (match_operand:V16QI 2 "register_operand" "w"))]
8923 "aes<aes_op>\\t%0.16b, %2.16b"
8924 [(set_attr "type" "crypto_aese")]
8927 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8928 [(set (match_operand:V16QI 0 "register_operand" "=w")
8929 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8932 "aes<aesmc_op>\\t%0.16b, %1.16b"
8933 [(set_attr "type" "crypto_aesmc")]
8936 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8937 ;; and enforce the register dependency without scheduling or register
8938 ;; allocation messing up the order or introducing moves inbetween.
8939 ;; Mash the two together during combine.
8941 (define_insn "*aarch64_crypto_aese_fused"
8942 [(set (match_operand:V16QI 0 "register_operand" "=w")
8946 (match_operand:V16QI 1 "register_operand" "%0")
8947 (match_operand:V16QI 2 "register_operand" "w"))]
8951 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8952 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8953 [(set_attr "type" "crypto_aese")
8954 (set_attr "length" "8")]
8957 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8958 ;; and enforce the register dependency without scheduling or register
8959 ;; allocation messing up the order or introducing moves inbetween.
8960 ;; Mash the two together during combine.
8962 (define_insn "*aarch64_crypto_aesd_fused"
8963 [(set (match_operand:V16QI 0 "register_operand" "=w")
8967 (match_operand:V16QI 1 "register_operand" "%0")
8968 (match_operand:V16QI 2 "register_operand" "w"))]
8972 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8973 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8974 [(set_attr "type" "crypto_aese")
8975 (set_attr "length" "8")]
8980 (define_insn "aarch64_crypto_sha1hsi"
8981 [(set (match_operand:SI 0 "register_operand" "=w")
8982 (unspec:SI [(match_operand:SI 1
8983 "register_operand" "w")]
8987 [(set_attr "type" "crypto_sha1_fast")]
8990 (define_insn "aarch64_crypto_sha1hv4si"
8991 [(set (match_operand:SI 0 "register_operand" "=w")
8992 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8993 (parallel [(const_int 0)]))]
8995 "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8997 [(set_attr "type" "crypto_sha1_fast")]
9000 (define_insn "aarch64_be_crypto_sha1hv4si"
9001 [(set (match_operand:SI 0 "register_operand" "=w")
9002 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
9003 (parallel [(const_int 3)]))]
9005 "TARGET_SHA2 && BYTES_BIG_ENDIAN"
9007 [(set_attr "type" "crypto_sha1_fast")]
9010 (define_insn "aarch64_crypto_sha1su1v4si"
9011 [(set (match_operand:V4SI 0 "register_operand" "=w")
9012 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9013 (match_operand:V4SI 2 "register_operand" "w")]
9016 "sha1su1\\t%0.4s, %2.4s"
9017 [(set_attr "type" "crypto_sha1_fast")]
9020 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
9021 [(set (match_operand:V4SI 0 "register_operand" "=w")
9022 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9023 (match_operand:SI 2 "register_operand" "w")
9024 (match_operand:V4SI 3 "register_operand" "w")]
9027 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
9028 [(set_attr "type" "crypto_sha1_slow")]
9031 (define_insn "aarch64_crypto_sha1su0v4si"
9032 [(set (match_operand:V4SI 0 "register_operand" "=w")
9033 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9034 (match_operand:V4SI 2 "register_operand" "w")
9035 (match_operand:V4SI 3 "register_operand" "w")]
9038 "sha1su0\\t%0.4s, %2.4s, %3.4s"
9039 [(set_attr "type" "crypto_sha1_xor")]
9044 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
9045 [(set (match_operand:V4SI 0 "register_operand" "=w")
9046 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9047 (match_operand:V4SI 2 "register_operand" "w")
9048 (match_operand:V4SI 3 "register_operand" "w")]
9051 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
9052 [(set_attr "type" "crypto_sha256_slow")]
9055 (define_insn "aarch64_crypto_sha256su0v4si"
9056 [(set (match_operand:V4SI 0 "register_operand" "=w")
9057 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9058 (match_operand:V4SI 2 "register_operand" "w")]
9061 "sha256su0\\t%0.4s, %2.4s"
9062 [(set_attr "type" "crypto_sha256_fast")]
9065 (define_insn "aarch64_crypto_sha256su1v4si"
9066 [(set (match_operand:V4SI 0 "register_operand" "=w")
9067 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9068 (match_operand:V4SI 2 "register_operand" "w")
9069 (match_operand:V4SI 3 "register_operand" "w")]
9072 "sha256su1\\t%0.4s, %2.4s, %3.4s"
9073 [(set_attr "type" "crypto_sha256_slow")]
9078 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
9079 [(set (match_operand:V2DI 0 "register_operand" "=w")
9080 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9081 (match_operand:V2DI 2 "register_operand" "w")
9082 (match_operand:V2DI 3 "register_operand" "w")]
9085 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
9086 [(set_attr "type" "crypto_sha512")]
9089 (define_insn "aarch64_crypto_sha512su0qv2di"
9090 [(set (match_operand:V2DI 0 "register_operand" "=w")
9091 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9092 (match_operand:V2DI 2 "register_operand" "w")]
9095 "sha512su0\\t%0.2d, %2.2d"
9096 [(set_attr "type" "crypto_sha512")]
9099 (define_insn "aarch64_crypto_sha512su1qv2di"
9100 [(set (match_operand:V2DI 0 "register_operand" "=w")
9101 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9102 (match_operand:V2DI 2 "register_operand" "w")
9103 (match_operand:V2DI 3 "register_operand" "w")]
9106 "sha512su1\\t%0.2d, %2.2d, %3.2d"
9107 [(set_attr "type" "crypto_sha512")]
9112 (define_insn "eor3q<mode>4"
9113 [(set (match_operand:VQ_I 0 "register_operand" "=w")
9116 (match_operand:VQ_I 2 "register_operand" "w")
9117 (match_operand:VQ_I 3 "register_operand" "w"))
9118 (match_operand:VQ_I 1 "register_operand" "w")))]
9120 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
9121 [(set_attr "type" "crypto_sha3")]
9124 (define_insn "aarch64_rax1qv2di"
9125 [(set (match_operand:V2DI 0 "register_operand" "=w")
9128 (match_operand:V2DI 2 "register_operand" "w")
9130 (match_operand:V2DI 1 "register_operand" "w")))]
9132 "rax1\\t%0.2d, %1.2d, %2.2d"
9133 [(set_attr "type" "crypto_sha3")]
9136 (define_insn "*aarch64_xarqv2di_insn"
9137 [(set (match_operand:V2DI 0 "register_operand" "=w")
9140 (match_operand:V2DI 1 "register_operand" "%w")
9141 (match_operand:V2DI 2 "register_operand" "w"))
9142 (match_operand:V2DI 3 "aarch64_simd_lshift_imm" "Dl")))]
9146 = GEN_INT (64 - INTVAL (unwrap_const_vec_duplicate (operands[3])));
9147 return "xar\\t%0.2d, %1.2d, %2.2d, %3";
9149 [(set_attr "type" "crypto_sha3")]
9152 ;; The semantics of the vxarq_u64 intrinsics treat the immediate argument as a
9153 ;; right-rotate amount but the recommended representation of rotates by a
9154 ;; constant in RTL is with the left ROTATE code. Translate between the
9155 ;; intrinsic-provided amount and the RTL operands in the expander here.
9156 ;; The define_insn for XAR will translate back to instruction semantics in its
9158 (define_expand "aarch64_xarqv2di"
9159 [(set (match_operand:V2DI 0 "register_operand")
9162 (match_operand:V2DI 1 "register_operand")
9163 (match_operand:V2DI 2 "register_operand"))
9164 (match_operand:SI 3 "aarch64_simd_shift_imm_di")))]
9168 = aarch64_simd_gen_const_vector_dup (V2DImode,
9169 64 - INTVAL (operands[3]));
9173 (define_insn "bcaxq<mode>4"
9174 [(set (match_operand:VQ_I 0 "register_operand" "=w")
9177 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
9178 (match_operand:VQ_I 2 "register_operand" "w"))
9179 (match_operand:VQ_I 1 "register_operand" "w")))]
9181 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
9182 [(set_attr "type" "crypto_sha3")]
9187 (define_insn "aarch64_sm3ss1qv4si"
9188 [(set (match_operand:V4SI 0 "register_operand" "=w")
9189 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9190 (match_operand:V4SI 2 "register_operand" "w")
9191 (match_operand:V4SI 3 "register_operand" "w")]
9194 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
9195 [(set_attr "type" "crypto_sm3")]
9199 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9200 [(set (match_operand:V4SI 0 "register_operand" "=w")
9201 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9202 (match_operand:V4SI 2 "register_operand" "w")
9203 (match_operand:V4SI 3 "register_operand" "w")
9204 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9207 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9208 [(set_attr "type" "crypto_sm3")]
9211 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9212 [(set (match_operand:V4SI 0 "register_operand" "=w")
9213 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9214 (match_operand:V4SI 2 "register_operand" "w")
9215 (match_operand:V4SI 3 "register_operand" "w")]
9218 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9219 [(set_attr "type" "crypto_sm3")]
9224 (define_insn "aarch64_sm4eqv4si"
9225 [(set (match_operand:V4SI 0 "register_operand" "=w")
9226 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9227 (match_operand:V4SI 2 "register_operand" "w")]
9230 "sm4e\\t%0.4s, %2.4s"
9231 [(set_attr "type" "crypto_sm4")]
9234 (define_insn "aarch64_sm4ekeyqv4si"
9235 [(set (match_operand:V4SI 0 "register_operand" "=w")
9236 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9237 (match_operand:V4SI 2 "register_operand" "w")]
9240 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9241 [(set_attr "type" "crypto_sm4")]
9246 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9247 [(set (match_operand:VDQSF 0 "register_operand")
9249 [(match_operand:VDQSF 1 "register_operand")
9250 (match_operand:<VFMLA_W> 2 "register_operand")
9251 (match_operand:<VFMLA_W> 3 "register_operand")]
9255 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9256 <nunits> * 2, false);
9257 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9258 <nunits> * 2, false);
9260 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9269 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9270 [(set (match_operand:VDQSF 0 "register_operand")
9272 [(match_operand:VDQSF 1 "register_operand")
9273 (match_operand:<VFMLA_W> 2 "register_operand")
9274 (match_operand:<VFMLA_W> 3 "register_operand")]
9278 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9279 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9281 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9289 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9290 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9293 (vec_select:<VFMLA_SEL_W>
9294 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9295 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9297 (vec_select:<VFMLA_SEL_W>
9298 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9299 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9300 (match_operand:VDQSF 1 "register_operand" "0")))]
9302 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9303 [(set_attr "type" "neon_fp_mul_s")]
9306 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9307 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9311 (vec_select:<VFMLA_SEL_W>
9312 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9313 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9315 (vec_select:<VFMLA_SEL_W>
9316 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9317 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9318 (match_operand:VDQSF 1 "register_operand" "0")))]
9320 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9321 [(set_attr "type" "neon_fp_mul_s")]
9324 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9325 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9328 (vec_select:<VFMLA_SEL_W>
9329 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9330 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9332 (vec_select:<VFMLA_SEL_W>
9333 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9334 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9335 (match_operand:VDQSF 1 "register_operand" "0")))]
9337 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9338 [(set_attr "type" "neon_fp_mul_s")]
9341 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9342 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9346 (vec_select:<VFMLA_SEL_W>
9347 (match_operand:<VFMLA_W> 2 "register_operand" "w")
9348 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9350 (vec_select:<VFMLA_SEL_W>
9351 (match_operand:<VFMLA_W> 3 "register_operand" "w")
9352 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9353 (match_operand:VDQSF 1 "register_operand" "0")))]
9355 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9356 [(set_attr "type" "neon_fp_mul_s")]
9359 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9360 [(set (match_operand:V2SF 0 "register_operand")
9361 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9362 (match_operand:V4HF 2 "register_operand")
9363 (match_operand:V4HF 3 "register_operand")
9364 (match_operand:SI 4 "aarch64_imm2")]
9368 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9369 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9371 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9380 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9381 [(set (match_operand:V2SF 0 "register_operand")
9382 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9383 (match_operand:V4HF 2 "register_operand")
9384 (match_operand:V4HF 3 "register_operand")
9385 (match_operand:SI 4 "aarch64_imm2")]
9389 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9390 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9392 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9400 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9401 [(set (match_operand:V2SF 0 "register_operand" "=w")
9405 (match_operand:V4HF 2 "register_operand" "w")
9406 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9410 (match_operand:V4HF 3 "register_operand" "x")
9411 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9412 (match_operand:V2SF 1 "register_operand" "0")))]
9414 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9415 [(set_attr "type" "neon_fp_mul_s")]
9418 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9419 [(set (match_operand:V2SF 0 "register_operand" "=w")
9424 (match_operand:V4HF 2 "register_operand" "w")
9425 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9429 (match_operand:V4HF 3 "register_operand" "x")
9430 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9431 (match_operand:V2SF 1 "register_operand" "0")))]
9433 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9434 [(set_attr "type" "neon_fp_mul_s")]
9437 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9438 [(set (match_operand:V2SF 0 "register_operand" "=w")
9442 (match_operand:V4HF 2 "register_operand" "w")
9443 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9447 (match_operand:V4HF 3 "register_operand" "x")
9448 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9449 (match_operand:V2SF 1 "register_operand" "0")))]
9451 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9452 [(set_attr "type" "neon_fp_mul_s")]
9455 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9456 [(set (match_operand:V2SF 0 "register_operand" "=w")
9461 (match_operand:V4HF 2 "register_operand" "w")
9462 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9466 (match_operand:V4HF 3 "register_operand" "x")
9467 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9468 (match_operand:V2SF 1 "register_operand" "0")))]
9470 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9471 [(set_attr "type" "neon_fp_mul_s")]
9474 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9475 [(set (match_operand:V4SF 0 "register_operand")
9476 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9477 (match_operand:V8HF 2 "register_operand")
9478 (match_operand:V8HF 3 "register_operand")
9479 (match_operand:SI 4 "aarch64_lane_imm3")]
9483 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9484 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9486 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9494 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9495 [(set (match_operand:V4SF 0 "register_operand")
9496 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9497 (match_operand:V8HF 2 "register_operand")
9498 (match_operand:V8HF 3 "register_operand")
9499 (match_operand:SI 4 "aarch64_lane_imm3")]
9503 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9504 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9506 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9514 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9515 [(set (match_operand:V4SF 0 "register_operand" "=w")
9519 (match_operand:V8HF 2 "register_operand" "w")
9520 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9524 (match_operand:V8HF 3 "register_operand" "x")
9525 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9526 (match_operand:V4SF 1 "register_operand" "0")))]
9528 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9529 [(set_attr "type" "neon_fp_mul_s")]
9532 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9533 [(set (match_operand:V4SF 0 "register_operand" "=w")
9538 (match_operand:V8HF 2 "register_operand" "w")
9539 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9543 (match_operand:V8HF 3 "register_operand" "x")
9544 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9545 (match_operand:V4SF 1 "register_operand" "0")))]
9547 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9548 [(set_attr "type" "neon_fp_mul_s")]
9551 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9552 [(set (match_operand:V4SF 0 "register_operand" "=w")
9556 (match_operand:V8HF 2 "register_operand" "w")
9557 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9561 (match_operand:V8HF 3 "register_operand" "x")
9562 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9563 (match_operand:V4SF 1 "register_operand" "0")))]
9565 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9566 [(set_attr "type" "neon_fp_mul_s")]
9569 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9570 [(set (match_operand:V4SF 0 "register_operand" "=w")
9575 (match_operand:V8HF 2 "register_operand" "w")
9576 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9580 (match_operand:V8HF 3 "register_operand" "x")
9581 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9582 (match_operand:V4SF 1 "register_operand" "0")))]
9584 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9585 [(set_attr "type" "neon_fp_mul_s")]
9588 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9589 [(set (match_operand:V2SF 0 "register_operand")
9590 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9591 (match_operand:V4HF 2 "register_operand")
9592 (match_operand:V8HF 3 "register_operand")
9593 (match_operand:SI 4 "aarch64_lane_imm3")]
9597 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9598 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9600 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9609 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9610 [(set (match_operand:V2SF 0 "register_operand")
9611 (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9612 (match_operand:V4HF 2 "register_operand")
9613 (match_operand:V8HF 3 "register_operand")
9614 (match_operand:SI 4 "aarch64_lane_imm3")]
9618 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9619 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9621 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9630 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9631 [(set (match_operand:V2SF 0 "register_operand" "=w")
9635 (match_operand:V4HF 2 "register_operand" "w")
9636 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9640 (match_operand:V8HF 3 "register_operand" "x")
9641 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9642 (match_operand:V2SF 1 "register_operand" "0")))]
9644 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9645 [(set_attr "type" "neon_fp_mul_s")]
9648 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9649 [(set (match_operand:V2SF 0 "register_operand" "=w")
9654 (match_operand:V4HF 2 "register_operand" "w")
9655 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9659 (match_operand:V8HF 3 "register_operand" "x")
9660 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9661 (match_operand:V2SF 1 "register_operand" "0")))]
9663 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9664 [(set_attr "type" "neon_fp_mul_s")]
9667 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9668 [(set (match_operand:V2SF 0 "register_operand" "=w")
9672 (match_operand:V4HF 2 "register_operand" "w")
9673 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9677 (match_operand:V8HF 3 "register_operand" "x")
9678 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9679 (match_operand:V2SF 1 "register_operand" "0")))]
9681 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9682 [(set_attr "type" "neon_fp_mul_s")]
9685 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9686 [(set (match_operand:V2SF 0 "register_operand" "=w")
9691 (match_operand:V4HF 2 "register_operand" "w")
9692 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9696 (match_operand:V8HF 3 "register_operand" "x")
9697 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9698 (match_operand:V2SF 1 "register_operand" "0")))]
9700 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9701 [(set_attr "type" "neon_fp_mul_s")]
9704 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9705 [(set (match_operand:V4SF 0 "register_operand")
9706 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9707 (match_operand:V8HF 2 "register_operand")
9708 (match_operand:V4HF 3 "register_operand")
9709 (match_operand:SI 4 "aarch64_imm2")]
9713 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9714 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9716 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9724 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9725 [(set (match_operand:V4SF 0 "register_operand")
9726 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9727 (match_operand:V8HF 2 "register_operand")
9728 (match_operand:V4HF 3 "register_operand")
9729 (match_operand:SI 4 "aarch64_imm2")]
9733 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9734 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9736 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9744 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9745 [(set (match_operand:V4SF 0 "register_operand" "=w")
9749 (match_operand:V8HF 2 "register_operand" "w")
9750 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9754 (match_operand:V4HF 3 "register_operand" "x")
9755 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9756 (match_operand:V4SF 1 "register_operand" "0")))]
9758 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9759 [(set_attr "type" "neon_fp_mul_s")]
9762 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9763 [(set (match_operand:V4SF 0 "register_operand" "=w")
9768 (match_operand:V8HF 2 "register_operand" "w")
9769 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9773 (match_operand:V4HF 3 "register_operand" "x")
9774 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9775 (match_operand:V4SF 1 "register_operand" "0")))]
9777 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9778 [(set_attr "type" "neon_fp_mul_s")]
9781 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9782 [(set (match_operand:V4SF 0 "register_operand" "=w")
9786 (match_operand:V8HF 2 "register_operand" "w")
9787 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9791 (match_operand:V4HF 3 "register_operand" "x")
9792 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9793 (match_operand:V4SF 1 "register_operand" "0")))]
9795 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9796 [(set_attr "type" "neon_fp_mul_s")]
9799 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9800 [(set (match_operand:V4SF 0 "register_operand" "=w")
9805 (match_operand:V8HF 2 "register_operand" "w")
9806 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9810 (match_operand:V4HF 3 "register_operand" "x")
9811 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9812 (match_operand:V4SF 1 "register_operand" "0")))]
9814 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9815 [(set_attr "type" "neon_fp_mul_s")]
9820 (define_insn "aarch64_crypto_pmulldi"
9821 [(set (match_operand:TI 0 "register_operand" "=w")
9822 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
9823 (match_operand:DI 2 "register_operand" "w")]
9826 "pmull\\t%0.1q, %1.1d, %2.1d"
9827 [(set_attr "type" "crypto_pmull")]
9830 (define_insn "aarch64_crypto_pmullv2di"
9831 [(set (match_operand:TI 0 "register_operand" "=w")
9832 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9833 (match_operand:V2DI 2 "register_operand" "w")]
9836 "pmull2\\t%0.1q, %1.2d, %2.2d"
9837 [(set_attr "type" "crypto_pmull")]
9840 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9841 (define_insn_and_split "<optab><Vnarrowq><mode>2"
9842 [(set (match_operand:VQN 0 "register_operand" "=w")
9843 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9845 "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9846 "&& <CODE> == ZERO_EXTEND
9847 && aarch64_split_simd_shift_p (insn)"
9850 /* On many cores, it is cheaper to implement UXTL using a ZIP1 with zero,
9851 provided that the cost of the zero can be amortized over several
9852 operations. We'll later recombine the zero and zip if there are
9853 not sufficient uses of the zero to make the split worthwhile. */
9854 rtx res = simplify_gen_subreg (<VNARROWQ2>mode, operands[0],
9856 rtx zero = aarch64_gen_shareable_zero (<VNARROWQ>mode);
9857 emit_insn (gen_aarch64_zip1<Vnarrowq2>_low (res, operands[1], zero));
9860 [(set_attr "type" "neon_shift_imm_long")]
9863 (define_expand "aarch64_<su>xtl<mode>"
9864 [(set (match_operand:VQN 0 "register_operand" "=w")
9865 (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9870 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9871 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9872 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9873 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9875 "xtn\t%0.<Vntype>, %1.<Vtype>"
9876 [(set_attr "type" "neon_move_narrow_q")]
9879 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9881 (define_expand "aarch64_xtn<mode>"
9882 [(set (match_operand:<VNARROWQ> 0 "register_operand")
9883 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9888 (define_insn "aarch64_bfdot<mode>"
9889 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9892 [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9893 (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9895 (match_operand:VDQSF 1 "register_operand" "0")))]
9897 "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9898 [(set_attr "type" "neon_dot<q>")]
9901 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9902 [(set (match_operand:VDQSF 0 "register_operand" "=w")
9905 [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9906 (match_operand:VBF 3 "register_operand" "w")
9907 (match_operand:SI 4 "const_int_operand" "n")]
9909 (match_operand:VDQSF 1 "register_operand" "0")))]
9912 int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9913 int lane = INTVAL (operands[4]);
9914 operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9915 return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9917 [(set_attr "type" "neon_dot<VDQSF:q>")]
9921 (define_insn "aarch64_bfmmlaqv4sf"
9922 [(set (match_operand:V4SF 0 "register_operand" "=w")
9923 (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9924 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9925 (match_operand:V8BF 3 "register_operand" "w")]
9928 "bfmmla\\t%0.4s, %2.8h, %3.8h"
9929 [(set_attr "type" "neon_fp_mla_s_q")]
9933 (define_insn "aarch64_bfmlal<bt>v4sf"
9934 [(set (match_operand:V4SF 0 "register_operand" "=w")
9935 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9936 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9937 (match_operand:V8BF 3 "register_operand" "w")]
9940 "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9941 [(set_attr "type" "neon_fp_mla_s_q")]
9944 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9945 [(set (match_operand:V4SF 0 "register_operand" "=w")
9946 (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9947 (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9948 (match_operand:VBF 3 "register_operand" "x")
9949 (match_operand:SI 4 "const_int_operand" "n")]
9953 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9954 return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9956 [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9959 ;; 8-bit integer matrix multiply-accumulate
9960 (define_insn "aarch64_simd_<sur>mmlav16qi"
9961 [(set (match_operand:V4SI 0 "register_operand" "=w")
9963 (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9964 (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9965 (match_operand:V4SI 1 "register_operand" "0")))]
9967 "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9968 [(set_attr "type" "neon_mla_s_q")]
9972 (define_insn "aarch64_bfcvtn<q><mode>"
9973 [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9974 (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9977 "bfcvtn\\t%0.4h, %1.4s"
9978 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9981 (define_insn "aarch64_bfcvtn2v8bf"
9982 [(set (match_operand:V8BF 0 "register_operand" "=w")
9983 (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9984 (match_operand:V4SF 2 "register_operand" "w")]
9987 "bfcvtn2\\t%0.8h, %2.4s"
9988 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9991 (define_insn "aarch64_bfcvtbf"
9992 [(set (match_operand:BF 0 "register_operand" "=w")
9993 (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9997 [(set_attr "type" "f_cvt")]
10000 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
10001 (define_insn "aarch64_vbfcvt<mode>"
10002 [(set (match_operand:V4SF 0 "register_operand" "=w")
10003 (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
10006 "shll\\t%0.4s, %1.4h, #16"
10007 [(set_attr "type" "neon_shift_imm_long")]
10010 (define_insn "aarch64_vbfcvt_highv8bf"
10011 [(set (match_operand:V4SF 0 "register_operand" "=w")
10012 (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
10015 "shll2\\t%0.4s, %1.8h, #16"
10016 [(set_attr "type" "neon_shift_imm_long")]
10019 (define_insn "aarch64_bfcvtsf"
10020 [(set (match_operand:SF 0 "register_operand" "=w")
10021 (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
10024 "shl\\t%d0, %d1, #16"
10025 [(set_attr "type" "neon_shift_imm")]
10029 (define_insn "@aarch64_<faminmax_uns_op><mode>"
10030 [(set (match_operand:VHSDF 0 "register_operand" "=w")
10031 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
10032 (match_operand:VHSDF 2 "register_operand" "w")]
10035 "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10038 (define_insn "*aarch64_faminmax_fused"
10039 [(set (match_operand:VHSDF 0 "register_operand" "=w")
10041 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
10042 (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"))))]
10044 "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10047 (define_insn "@aarch64_lut<VLUT:mode><VB:mode>"
10048 [(set (match_operand:<VLUT:VCONQ> 0 "register_operand" "=w")
10049 (unspec:<VLUT:VCONQ>
10050 [(match_operand:VLUT 1 "register_operand" "w")
10051 (match_operand:VB 2 "register_operand" "w")
10052 (match_operand:SI 3 "const_int_operand")
10053 (match_operand:SI 4 "const_int_operand")]
10055 "TARGET_LUT && INTVAL (operands[4]) <= exact_log2 (<VLUT:nunits>)"
10056 "luti%4\t%0<VLUT:Vconqtype>, {%1<VLUT:Vconqtype>}, %2[%3]"
10060 (define_insn "@aarch64_lut<VLUTx2:mode><VB:mode>"
10061 [(set (match_operand:<VSTRUCT_ELT> 0 "register_operand" "=w")
10062 (unspec:<VSTRUCT_ELT>
10063 [(match_operand:VLUTx2 1 "register_operand" "w")
10064 (match_operand:VB 2 "register_operand" "w")
10065 (match_operand:SI 3 "const_int_operand")
10066 (match_operand:SI 4 "const_int_operand")]
10068 "TARGET_LUT && INTVAL (operands[4]) == 4"
10069 "luti%4\t%0.8h, {%S1.8h, %T1.8h}, %2[%3]"
10072 ;; fpm unary instructions (low part).
10073 (define_insn "@aarch64_<insn><mode>"
10074 [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
10076 [(match_operand:V8QI 1 "register_operand" "w")
10077 (reg:DI FPM_REGNUM)]
10080 "<b><insn>\t%0.<Vtype>, %1.8b"
10083 ;; fpm unary instructions (high part).
10084 (define_insn "@aarch64_<insn><mode>_high"
10085 [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
10088 (match_operand:V16QI 1 "register_operand" "w")
10089 (match_operand:V16QI 2 "vect_par_cnst_hi_half"))
10090 (reg:DI FPM_REGNUM)]
10093 "<b><insn>2\t%0.<Vtype>, %1.16b"
10096 ;; fpm binary instructions.
10097 (define_insn "@aarch64_<insn><mode>"
10098 [(set (match_operand:<VPACKB> 0 "register_operand" "=w")
10100 [(match_operand:VCVTFPM 1 "register_operand" "w")
10101 (match_operand:VCVTFPM 2 "register_operand" "w")
10102 (reg:DI FPM_REGNUM)]
10105 "<insn>\t%0.<VPACKBtype>, %1.<Vtype>, %2.<Vtype>"
10108 ;; fpm binary instructions & merge with low.
10109 (define_insn "@aarch64_<insn><mode>_high_le"
10110 [(set (match_operand:V16QI 0 "register_operand" "=w")
10112 (match_operand:V8QI 1 "register_operand" "0")
10114 [(match_operand:V4SF_ONLY 2 "register_operand" "w")
10115 (match_operand:V4SF_ONLY 3 "register_operand" "w")
10116 (reg:DI FPM_REGNUM)]
10118 "TARGET_FP8 && !BYTES_BIG_ENDIAN"
10119 "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
10122 (define_insn "@aarch64_<insn><mode>_high_be"
10123 [(set (match_operand:V16QI 0 "register_operand" "=w")
10126 [(match_operand:V4SF_ONLY 2 "register_operand" "w")
10127 (match_operand:V4SF_ONLY 3 "register_operand" "w")
10128 (reg:DI FPM_REGNUM)]
10130 (match_operand:V8QI 1 "register_operand" "0")))]
10131 "TARGET_FP8 && BYTES_BIG_ENDIAN"
10132 "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
10135 ;; fscale instructions
10136 (define_insn "@aarch64_<insn><mode>"
10137 [(set (match_operand:VHSDF 0 "register_operand" "=w")
10138 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
10139 (match_operand:<FCVT_TARGET> 2 "register_operand" "w")]
10142 "<insn>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10145 ;; fpm vdot instructions. The target requirements are enforced by
10147 (define_insn "@aarch64_<insn><mode>"
10148 [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
10149 (unspec:VDQ_HSF_FDOT
10150 [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
10151 (match_operand:<VNARROWB> 2 "register_operand" "w")
10152 (match_operand:<VNARROWB> 3 "register_operand" "w")
10153 (reg:DI FPM_REGNUM)]
10156 "<insn>\t%1.<Vtype>, %2.<Vnbtype>, %3.<Vnbtype>"
10159 (define_insn "@aarch64_<insn>_lane<VDQ_HSF_FDOT:mode><VB:mode>"
10160 [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
10161 (unspec:VDQ_HSF_FDOT
10162 [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
10163 (match_operand:<VDQ_HSF_FDOT:VNARROWB> 2 "register_operand" "w")
10164 (match_operand:VB 3 "register_operand" "w")
10165 (match_operand 4 "const_int_operand")
10166 (reg:DI FPM_REGNUM)]
10169 "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
10172 ;; fpm fma instructions.
10173 (define_insn "@aarch64_<insn><mode>"
10174 [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
10176 [(match_operand:V8HF_ONLY 1 "register_operand" "0")
10177 (match_operand:V16QI 2 "register_operand" "w")
10178 (match_operand:V16QI 3 "register_operand" "w")
10179 (reg:DI FPM_REGNUM)]
10182 "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
10185 (define_insn "@aarch64_<insn>_lane<V8HF_ONLY:mode><VB:mode>"
10186 [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
10188 [(match_operand:V8HF_ONLY 1 "register_operand" "0")
10189 (match_operand:V16QI 2 "register_operand" "w")
10190 (vec_duplicate:V16QI
10192 (match_operand:VB 3 "register_operand" "w")
10193 (parallel [(match_operand:SI 4 "immediate_operand")])))
10194 (reg:DI FPM_REGNUM)]
10198 operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
10199 INTVAL (operands[4]));
10200 return "<insn>\t%0.<V8HF_ONLY:Vtype>, %2.16b, %3.b[%4]";
10204 (define_insn "@aarch64_<insn><mode>"
10205 [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
10207 [(match_operand:V4SF_ONLY 1 "register_operand" "0")
10208 (match_operand:V16QI 2 "register_operand" "w")
10209 (match_operand:V16QI 3 "register_operand" "w")
10210 (reg:DI FPM_REGNUM)]
10213 "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
10216 (define_insn "@aarch64_<insn>_lane<V4SF_ONLY:mode><VB:mode>"
10217 [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
10219 [(match_operand:V4SF_ONLY 1 "register_operand" "0")
10220 (match_operand:V16QI 2 "register_operand" "w")
10221 (vec_duplicate:V16QI
10223 (match_operand:VB 3 "register_operand" "w")
10224 (parallel [(match_operand:SI 4 "immediate_operand")])))
10225 (reg:DI FPM_REGNUM)]
10229 operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
10230 INTVAL (operands[4]));
10231 return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]";