Daily bump.
[official-gcc.git] / gcc / config / aarch64 / aarch64-simd.md
blobe2afe87e5130cc066b8348659209ab40747327e5
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3.  If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The following define_subst rules are used to produce patterns representing
22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
23 ;; a vec_concat with zeroes.  The order of the vec_concat operands differs
24 ;; for big-endian so we have a separate define_subst rule for each endianness.
25 (define_subst "add_vec_concat_subst_le"
26   [(set (match_operand:VDZ 0)
27         (match_operand:VDZ 1))]
28   "!BYTES_BIG_ENDIAN"
29   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
30         (vec_concat:<VDBL>
31          (match_dup 1)
32          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
34 (define_subst "add_vec_concat_subst_be"
35   [(set (match_operand:VDZ 0)
36         (match_operand:VDZ 1))]
37   "BYTES_BIG_ENDIAN"
38   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
39         (vec_concat:<VDBL>
40          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
41          (match_dup 1)))])
43 ;; The subst_attr definitions used to annotate patterns further in the file.
44 ;; Patterns that need to have the above substitutions added to them should
45 ;; have <vczle><vczbe> added to their name.
46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
49 (define_expand "mov<mode>"
50   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
51         (match_operand:VALL_F16 1 "general_operand"))]
52   "TARGET_FLOAT"
53   "
54   /* Force the operand into a register if it is not an
55      immediate whose use can be replaced with xzr.
56      If the mode is 16 bytes wide, then we will be doing
57      a stp in DI mode, so we check the validity of that.
58      If the mode is 8 bytes wide, then we will do doing a
59      normal str, so the check need not apply.  */
60   if (GET_CODE (operands[0]) == MEM
61       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
62            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
63                 && aarch64_mem_pair_operand (operands[0], DImode))
64                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
65       operands[1] = force_reg (<MODE>mode, operands[1]);
67   /* If a constant is too complex to force to memory (e.g. because it
68      contains CONST_POLY_INTs), build it up from individual elements instead.
69      We should only need to do this before RA; aarch64_legitimate_constant_p
70      should ensure that we don't try to rematerialize the constant later.  */
71   if (GET_CODE (operands[1]) == CONST_VECTOR
72       && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
73     {
74       aarch64_expand_vector_init (operands[0], operands[1]);
75       DONE;
76     }
77   "
80 (define_expand "movmisalign<mode>"
81   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
82         (match_operand:VALL_F16 1 "general_operand"))]
83   "TARGET_FLOAT && !STRICT_ALIGNMENT"
85   /* This pattern is not permitted to fail during expansion: if both arguments
86      are non-registers (e.g. memory := constant, which can be created by the
87      auto-vectorizer), force operand 1 into a register.  */
88   if (!register_operand (operands[0], <MODE>mode)
89       && !register_operand (operands[1], <MODE>mode))
90     operands[1] = force_reg (<MODE>mode, operands[1]);
93 (define_insn "aarch64_simd_dup<mode>"
94   [(set (match_operand:VDQ_I 0 "register_operand")
95         (vec_duplicate:VDQ_I
96           (match_operand:<VEL> 1 "register_operand")))]
97   "TARGET_SIMD"
98   {@ [ cons: =0 , 1  ; attrs: type      ]
99      [ w        , w  ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
100      [ w        , ?r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
101   }
104 (define_insn "aarch64_simd_dup<mode>"
105   [(set (match_operand:VDQF_F16 0 "register_operand")
106         (vec_duplicate:VDQF_F16
107           (match_operand:<VEL> 1 "register_operand")))]
108   "TARGET_SIMD"
109   {@ [ cons: =0 , 1 ; attrs: type      ]
110      [ w        , w ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
111      [ w        , r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
112   }
115 (define_insn "@aarch64_dup_lane<mode>"
116   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
117         (vec_duplicate:VALL_F16
118           (vec_select:<VEL>
119             (match_operand:VALL_F16 1 "register_operand" "w")
120             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
121           )))]
122   "TARGET_SIMD"
123   {
124     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
125     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
126   }
127   [(set_attr "type" "neon_dup<q>")]
130 (define_insn "@aarch64_dup_lane_<vswap_width_name><mode>"
131   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
132         (vec_duplicate:VALL_F16_NO_V2Q
133           (vec_select:<VEL>
134             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
135             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
136           )))]
137   "TARGET_SIMD"
138   {
139     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
140     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
141   }
142   [(set_attr "type" "neon_dup<q>")]
145 (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
146   [(set (match_operand:VDMOV 0 "nonimmediate_operand")
147         (match_operand:VDMOV 1 "general_operand"))]
148   "TARGET_FLOAT
149    && (register_operand (operands[0], <MODE>mode)
150        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
151   {@ [cons: =0, 1; attrs: type, arch, length]
152      [w , m ; neon_load1_1reg<q> , *        , *] ldr\t%d0, %1
153      [r , m ; load_8             , *        , *] ldr\t%x0, %1
154      [m , Dz; store_8            , *        , *] str\txzr, %0
155      [m , w ; neon_store1_1reg<q>, *        , *] str\t%d1, %0
156      [m , r ; store_8            , *        , *] str\t%x1, %0
157      [w , w ; neon_logic<q>      , simd     , *] mov\t%0.<Vbtype>, %1.<Vbtype>
158      [w , w ; neon_logic<q>      , *        , *] fmov\t%d0, %d1
159      [?r, w ; neon_to_gp<q>      , base_simd, *] umov\t%0, %1.d[0]
160      [?r, w ; neon_to_gp<q>      , *        , *] fmov\t%x0, %d1
161      [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
162      [?r, r ; mov_reg            , *        , *] mov\t%0, %1
163      [w , Dn; neon_move<q>       , simd     , *] << aarch64_output_simd_mov_imm (operands[1], 64);
164      [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
165      [w , Dx; neon_move          , simd     , 8] #
166   }
167   "CONST_INT_P (operands[1])
168    && aarch64_simd_special_constant_p (operands[1], <MODE>mode)
169    && FP_REGNUM_P (REGNO (operands[0]))"
170   [(const_int 0)]
171   {
172     aarch64_maybe_generate_simd_constant (operands[0], operands[1], <MODE>mode);
173     DONE;
174   }
177 (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
178   [(set (match_operand:VQMOV 0 "nonimmediate_operand")
179         (match_operand:VQMOV 1 "general_operand"))]
180   "TARGET_FLOAT
181    && (register_operand (operands[0], <MODE>mode)
182        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
183   {@ [cons: =0, 1; attrs: type, arch, length]
184      [w  , m ; neon_load1_1reg<q> , *   , 4] ldr\t%q0, %1
185      [Umn, Dz; store_16           , *   , 4] stp\txzr, xzr, %0
186      [m  , w ; neon_store1_1reg<q>, *   , 4] str\t%q1, %0
187      [w  , w ; neon_logic<q>      , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
188      [w  , w ; *                  , sve , 4] mov\t%Z0.d, %Z1.d
189      [?r , w ; multiple           , *   , 8] #
190      [?w , r ; multiple           , *   , 8] #
191      [?r , r ; multiple           , *   , 8] #
192      [w  , Dn; neon_move<q>       , simd, 4] << aarch64_output_simd_mov_imm (operands[1], 128);
193      [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
194      [w  , Dx; neon_move          , simd, 8] #
195   }
196   "&& reload_completed
197    && ((REG_P (operands[0])
198         && REG_P (operands[1])
199         && !(FP_REGNUM_P (REGNO (operands[0]))
200              && FP_REGNUM_P (REGNO (operands[1]))))
201        || (aarch64_simd_special_constant_p (operands[1], <MODE>mode)
202            && FP_REGNUM_P (REGNO (operands[0]))))"
203   [(const_int 0)]
204   {
205     if (GP_REGNUM_P (REGNO (operands[0]))
206         && GP_REGNUM_P (REGNO (operands[1])))
207       aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
208     else
209       {
210         if (FP_REGNUM_P (REGNO (operands[0]))
211             && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
212                                                      <MODE>mode))
213           ;
214         else
215           aarch64_split_simd_move (operands[0], operands[1]);
216       }
217     DONE;
218   }
221 ;; When storing lane zero we can use the normal STR and its more permissive
222 ;; addressing modes.
224 (define_insn "aarch64_store_lane0<mode>"
225   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
226         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
227                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
228   "TARGET_FLOAT
229    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
230   "str\\t%<Vetype>1, %0"
231   [(set_attr "type" "neon_store1_1reg<q>")]
234 (define_insn "aarch64_simd_stp<mode>"
235   [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand")
236         (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand")))]
237   "TARGET_SIMD"
238   {@ [ cons: =0 , 1 ; attrs: type            ]
239      [ Umn      , w ; neon_stp               ] stp\t%<Vetype>1, %<Vetype>1, %y0
240      [ Umn      , r ; store_<ldpstp_vel_sz>  ] stp\t%<vwcore>1, %<vwcore>1, %y0
241   }
244 (define_expand "@aarch64_split_simd_mov<mode>"
245   [(set (match_operand:VQMOV 0)
246         (match_operand:VQMOV 1))]
247   "TARGET_FLOAT"
248   {
249     rtx dst = operands[0];
250     rtx src = operands[1];
252     if (GP_REGNUM_P (REGNO (src)))
253       {
254         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
255         rtx src_high_part = gen_highpart (<VHALF>mode, src);
256         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
258         emit_move_insn (dst_low_part, src_low_part);
259         emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
260                                                src_high_part));
261       }
262     else
263       {
264         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
265         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
266         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
267         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
268         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
269         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
270       }
271     DONE;
272   }
275 (define_expand "aarch64_get_half<mode>"
276   [(set (match_operand:<VHALF> 0 "register_operand")
277         (vec_select:<VHALF>
278           (match_operand:VQMOV 1 "register_operand")
279           (match_operand 2 "ascending_int_parallel")))]
280   "TARGET_FLOAT"
281   {
282     if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
283       {
284         emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
285         DONE;
286       }
287   }
290 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
291   [(set (match_operand:<VHALF> 0 "register_operand")
292         (vec_select:<VHALF>
293           (match_operand:VQMOV_NO2E 1 "register_operand")
294           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half")))]
295   "TARGET_FLOAT"
296   {@ [ cons: =0 , 1 ; attrs: type   , arch      ]
297      [ w        , w ; mov_reg       , simd      ] #
298      [ ?r       , w ; neon_to_gp<q> , base_simd ] umov\t%0, %1.d[0]
299      [ ?r       , w ; f_mrc         , *         ] fmov\t%0, %d1
300   }
301   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
302   [(set (match_dup 0) (match_dup 1))]
303   {
304     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
305   }
306   [(set_attr "length" "4")]
309 (define_insn "aarch64_simd_mov_from_<mode>high"
310   [(set (match_operand:<VHALF> 0 "register_operand")
311         (vec_select:<VHALF>
312           (match_operand:VQMOV_NO2E 1 "register_operand")
313           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half")))]
314   "TARGET_FLOAT"
315   {@ [ cons: =0 , 1 ; attrs: type   , arch  ]
316      [ w        , w ; neon_dup<q>   , simd  ] dup\t%d0, %1.d[1]
317      [ w        , w ; *             , sve   ] ext\t%Z0.b, %Z0.b, %Z0.b, #8
318      [ ?r       , w ; neon_to_gp<q> , simd  ] umov\t%0, %1.d[1]
319      [ ?r       , w ; f_mrc         , *     ] fmov\t%0, %1.d[1]
320   }
321   [(set_attr "length" "4")]
324 (define_insn "iorn<mode>3<vczle><vczbe>"
325  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
326        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
327                 (match_operand:VDQ_I 1 "register_operand" "w")))]
328  "TARGET_SIMD"
329  "orn\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
330   [(set_attr "type" "neon_logic<q>")]
333 (define_insn "andn<mode>3<vczle><vczbe>"
334  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
335        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
336                 (match_operand:VDQ_I 1 "register_operand" "w")))]
337  "TARGET_SIMD"
338  "bic\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
339   [(set_attr "type" "neon_logic<q>")]
342 (define_insn "add<mode>3<vczle><vczbe>"
343   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
344         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
345                   (match_operand:VDQ_I 2 "register_operand" "w")))]
346   "TARGET_SIMD"
347   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
348   [(set_attr "type" "neon_add<q>")]
351 (define_insn "sub<mode>3<vczle><vczbe>"
352   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
353         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
354                    (match_operand:VDQ_I 2 "register_operand" "w")))]
355   "TARGET_SIMD"
356   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
357   [(set_attr "type" "neon_sub<q>")]
360 (define_insn "mul<mode>3<vczle><vczbe>"
361   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
362         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
363                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
364   "TARGET_SIMD"
365   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
366   [(set_attr "type" "neon_mul_<Vetype><q>")]
369 (define_insn "bswap<mode>2"
370   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
371         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
372   "TARGET_SIMD"
373   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
374   [(set_attr "type" "neon_rev<q>")]
377 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
378   [(set (match_operand:VB 0 "register_operand" "=w")
379         (bitreverse:VB (match_operand:VB 1 "register_operand" "w")))]
380   "TARGET_SIMD"
381   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
382   [(set_attr "type" "neon_rbit")]
385 (define_expand "ctz<mode>2"
386   [(set (match_operand:VS 0 "register_operand")
387         (ctz:VS (match_operand:VS 1 "register_operand")))]
388   "TARGET_SIMD"
389   {
390      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
391      rtx op0_castsi2qi = force_subreg (<VS:VSI2QI>mode, operands[0],
392                                        <MODE>mode, 0);
393      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
394      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
395      DONE;
396   }
399 (define_expand "@xorsign<mode>3"
400   [(match_operand:VHSDF 0 "register_operand")
401    (match_operand:VHSDF 1 "register_operand")
402    (match_operand:VHSDF 2 "register_operand")]
403   "TARGET_SIMD"
406   machine_mode imode = <V_INT_EQUIV>mode;
407   rtx v_bitmask = gen_reg_rtx (imode);
408   rtx op1x = gen_reg_rtx (imode);
409   rtx op2x = gen_reg_rtx (imode);
411   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
412   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
414   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
416   emit_move_insn (v_bitmask,
417                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
418                                                      HOST_WIDE_INT_M1U << bits));
420   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
421   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
422   emit_move_insn (operands[0],
423                   lowpart_subreg (<MODE>mode, op1x, imode));
424   DONE;
428 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
429 ;; fact that their usage need to guarantee that the source vectors are
430 ;; contiguous.  It would be wrong to describe the operation without being able
431 ;; to describe the permute that is also required, but even if that is done
432 ;; the permute would have been created as a LOAD_LANES which means the values
433 ;; in the registers are in the wrong order.
434 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
435   [(set (match_operand:VHSDF 0 "register_operand" "=w")
436         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
437                        (match_operand:VHSDF 2 "register_operand" "w")]
438                        FCADD))]
439   "TARGET_COMPLEX"
440   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
441   [(set_attr "type" "neon_fcadd")]
444 (define_expand "cadd<rot><mode>3"
445   [(set (match_operand:VHSDF 0 "register_operand")
446         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
447                        (match_operand:VHSDF 2 "register_operand")]
448                        FCADD))]
449   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
452 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
453   [(set (match_operand:VHSDF 0 "register_operand" "=w")
454         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
455                                    (match_operand:VHSDF 3 "register_operand" "w")]
456                                    FCMLA)
457                     (match_operand:VHSDF 1 "register_operand" "0")))]
458   "TARGET_COMPLEX"
459   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
460   [(set_attr "type" "neon_fcmla")]
464 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
465   [(set (match_operand:VHSDF 0 "register_operand" "=w")
466         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
467                                    (match_operand:VHSDF 3 "register_operand" "w")
468                                    (match_operand:SI 4 "const_int_operand" "n")]
469                                    FCMLA)
470                     (match_operand:VHSDF 1 "register_operand" "0")))]
471   "TARGET_COMPLEX"
473   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
474   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
476   [(set_attr "type" "neon_fcmla")]
479 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
480   [(set (match_operand:V4HF 0 "register_operand" "=w")
481         (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
482                                  (match_operand:V8HF 3 "register_operand" "w")
483                                  (match_operand:SI 4 "const_int_operand" "n")]
484                                  FCMLA)
485                    (match_operand:V4HF 1 "register_operand" "0")))]
486   "TARGET_COMPLEX"
488   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
489   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
491   [(set_attr "type" "neon_fcmla")]
494 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
495   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
496         (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
497                                      (match_operand:<VHALF> 3 "register_operand" "w")
498                                      (match_operand:SI 4 "const_int_operand" "n")]
499                                      FCMLA)
500                      (match_operand:VQ_HSF 1 "register_operand" "0")))]
501   "TARGET_COMPLEX"
503   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
504   operands[4]
505     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
506   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
508   [(set_attr "type" "neon_fcmla")]
511 ;; The complex mla/mls operations always need to expand to two instructions.
512 ;; The first operation does half the computation and the second does the
513 ;; remainder.  Because of this, expand early.
514 (define_expand "cml<fcmac1><conj_op><mode>4"
515   [(set (match_operand:VHSDF 0 "register_operand")
516         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
517                                    (match_operand:VHSDF 2 "register_operand")]
518                                    FCMLA_OP)
519                     (match_operand:VHSDF 3 "register_operand")))]
520   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
522   rtx tmp = gen_reg_rtx (<MODE>mode);
523   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
524                                                  operands[2], operands[1]));
525   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
526                                                  operands[2], operands[1]));
527   DONE;
530 ;; The complex mul operations always need to expand to two instructions.
531 ;; The first operation does half the computation and the second does the
532 ;; remainder.  Because of this, expand early.
533 (define_expand "cmul<conj_op><mode>3"
534   [(set (match_operand:VHSDF 0 "register_operand")
535         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
536                        (match_operand:VHSDF 2 "register_operand")]
537                        FCMUL_OP))]
538   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
540   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
541   rtx res1 = gen_reg_rtx (<MODE>mode);
542   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
543                                                  operands[2], operands[1]));
544   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
545                                                  operands[2], operands[1]));
546   DONE;
549 ;; These expands map to the Dot Product optab the vectorizer checks for
550 ;; and to the intrinsics patttern.
551 ;; The auto-vectorizer expects a dot product builtin that also does an
552 ;; accumulation into the provided register.
553 ;; Given the following pattern
555 ;; for (i=0; i<len; i++) {
556 ;;     c = a[i] * b[i];
557 ;;     r += c;
558 ;; }
559 ;; return result;
561 ;; This can be auto-vectorized to
562 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
564 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
565 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
566 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
567 ;; ...
569 ;; and so the vectorizer provides r, in which the result has to be accumulated.
570 (define_insn "<sur>dot_prod<mode><vsi2qi><vczle><vczbe>"
571   [(set (match_operand:VS 0 "register_operand" "=w")
572         (plus:VS
573           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
574                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
575                       DOTPROD)
576           (match_operand:VS 3 "register_operand" "0")))]
577   "TARGET_DOTPROD"
578   "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
579   [(set_attr "type" "neon_dot<q>")]
582 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
583 ;; (vector) Dot Product operation and the vectorized optab.
584 (define_insn "usdot_prod<mode><vsi2qi><vczle><vczbe>"
585   [(set (match_operand:VS 0 "register_operand" "=w")
586         (plus:VS
587           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
588                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
589           UNSPEC_USDOT)
590           (match_operand:VS 3 "register_operand" "0")))]
591   "TARGET_I8MM"
592   "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
593   [(set_attr "type" "neon_dot<q>")]
596 ;; These instructions map to the __builtins for the Dot Product
597 ;; indexed operations.
598 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
599   [(set (match_operand:VS 0 "register_operand" "=w")
600         (plus:VS
601           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
602                       (match_operand:V8QI 3 "register_operand" "<h_con>")
603                       (match_operand:SI 4 "immediate_operand" "i")]
604                       DOTPROD)
605           (match_operand:VS 1 "register_operand" "0")))]
606   "TARGET_DOTPROD"
607   {
608     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
609     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
610   }
611   [(set_attr "type" "neon_dot<q>")]
614 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
615   [(set (match_operand:VS 0 "register_operand" "=w")
616         (plus:VS
617           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
618                       (match_operand:V16QI 3 "register_operand" "<h_con>")
619                       (match_operand:SI 4 "immediate_operand" "i")]
620                       DOTPROD)
621           (match_operand:VS 1 "register_operand" "0")))]
622   "TARGET_DOTPROD"
623   {
624     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
625     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
626   }
627   [(set_attr "type" "neon_dot<q>")]
630 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
631 ;; (by element) Dot Product operations.
632 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
633   [(set (match_operand:VS 0 "register_operand" "=w")
634         (plus:VS
635           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
636                       (match_operand:VB 3 "register_operand" "w")
637                       (match_operand:SI 4 "immediate_operand" "i")]
638           DOTPROD_I8MM)
639           (match_operand:VS 1 "register_operand" "0")))]
640   "TARGET_I8MM"
641   {
642     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
643     int lane = INTVAL (operands[4]);
644     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
645     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
646   }
647   [(set_attr "type" "neon_dot<VS:q>")]
650 (define_expand "copysign<mode>3"
651   [(match_operand:VHSDF 0 "register_operand")
652    (match_operand:VHSDF 1 "register_operand")
653    (match_operand:VHSDF 2 "nonmemory_operand")]
654   "TARGET_SIMD"
656   machine_mode int_mode = <V_INT_EQUIV>mode;
657   rtx v_bitmask = gen_reg_rtx (int_mode);
658   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
660   emit_move_insn (v_bitmask,
661                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
662                                                      HOST_WIDE_INT_M1U << bits));
664   /* copysign (x, -1) should instead be expanded as orr with the sign
665      bit.  */
666   if (!REG_P (operands[2]))
667     {
668       rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
669       if (GET_CODE (op2_elt) == CONST_DOUBLE
670           && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
671         {
672           emit_insn (gen_ior<v_int_equiv>3 (
673             lowpart_subreg (int_mode, operands[0], <MODE>mode),
674             lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
675           DONE;
676         }
677     }
679   operands[2] = force_reg (<MODE>mode, operands[2]);
680   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
681                                          operands[2], operands[1]));
682   DONE;
686 (define_insn "mul_lane<mode>3"
687  [(set (match_operand:VMULD 0 "register_operand" "=w")
688        (mult:VMULD
689          (vec_duplicate:VMULD
690            (vec_select:<VEL>
691              (match_operand:<VCOND> 2 "register_operand" "<h_con>")
692              (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
693          (match_operand:VMULD 1 "register_operand" "w")))]
694   "TARGET_SIMD"
695   {
696     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
697     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
698   }
699   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
702 (define_insn "mul_laneq<mode>3"
703   [(set (match_operand:VMUL 0 "register_operand" "=w")
704      (mult:VMUL
705        (vec_duplicate:VMUL
706           (vec_select:<VEL>
707             (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
708             (parallel [(match_operand:SI 3 "immediate_operand")])))
709       (match_operand:VMUL 1 "register_operand" "w")))]
710   "TARGET_SIMD"
711   {
712     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
713     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
714   }
715   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
718 (define_insn "mul_n<mode>3"
719  [(set (match_operand:VMUL 0 "register_operand" "=w")
720        (mult:VMUL
721          (vec_duplicate:VMUL
722            (match_operand:<VEL> 2 "register_operand" "<h_con>"))
723          (match_operand:VMUL 1 "register_operand" "w")))]
724   "TARGET_SIMD"
725   "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
726   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
729 (define_insn "@aarch64_rsqrte<mode>"
730   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
731         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
732                      UNSPEC_RSQRTE))]
733   "TARGET_SIMD"
734   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
735   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
737 (define_insn "@aarch64_rsqrts<mode>"
738   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
739         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
740                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
741          UNSPEC_RSQRTS))]
742   "TARGET_SIMD"
743   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
744   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
746 (define_expand "rsqrt<mode>2"
747   [(set (match_operand:VALLF 0 "register_operand")
748         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
749                      UNSPEC_RSQRT))]
750   "TARGET_SIMD"
752   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
753   DONE;
756 (define_insn "aarch64_ursqrte<mode>"
757 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
758       (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
759                    UNSPEC_RSQRTE))]
760 "TARGET_SIMD"
761 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
762 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
764 (define_insn "*aarch64_mul3_elt_to_64v2df"
765   [(set (match_operand:DF 0 "register_operand" "=w")
766      (mult:DF
767        (vec_select:DF
768          (match_operand:V2DF 1 "register_operand" "w")
769          (parallel [(match_operand:SI 2 "immediate_operand")]))
770        (match_operand:DF 3 "register_operand" "w")))]
771   "TARGET_SIMD"
772   {
773     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
774     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
775   }
776   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
779 (define_insn "neg<mode>2<vczle><vczbe>"
780   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
781         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
782   "TARGET_SIMD"
783   "neg\t%0.<Vtype>, %1.<Vtype>"
784   [(set_attr "type" "neon_neg<q>")]
787 (define_insn "abs<mode>2<vczle><vczbe>"
788   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
789         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
790   "TARGET_SIMD"
791   "abs\t%0.<Vtype>, %1.<Vtype>"
792   [(set_attr "type" "neon_abs<q>")]
795 ;; The intrinsic version of integer ABS must not be allowed to
796 ;; combine with any operation with an integrated ABS step, such
797 ;; as SABD.
798 (define_insn "aarch64_abs<mode><vczle><vczbe>"
799   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
800           (unspec:VSDQ_I_DI
801             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
802            UNSPEC_ABS))]
803   "TARGET_SIMD"
804   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
805   [(set_attr "type" "neon_abs<q>")]
808 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
809 ;; This isn't accurate as ABS treats always its input as a signed value.
810 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
811 ;; Whereas SABD would return 192 (-64 signed) on the above example.
812 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
813 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
814   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
815         (minus:VDQ_BHSI
816           (USMAX:VDQ_BHSI
817             (match_operand:VDQ_BHSI 1 "register_operand" "w")
818             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
819           (<max_opp>:VDQ_BHSI
820             (match_dup 1)
821             (match_dup 2))))]
822   "TARGET_SIMD"
823   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
824   [(set_attr "type" "neon_abd<q>")]
827 (define_expand "<su>abd<mode>3"
828   [(match_operand:VDQ_BHSI 0 "register_operand")
829    (USMAX:VDQ_BHSI
830      (match_operand:VDQ_BHSI 1 "register_operand")
831      (match_operand:VDQ_BHSI 2 "register_operand"))]
832   "TARGET_SIMD"
833   {
834     emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
835     DONE;
836   }
839 (define_insn "aarch64_<su>abdl<mode>"
840   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
841         (zero_extend:<VWIDE>
842           (minus:VD_BHSI
843             (USMAX:VD_BHSI
844               (match_operand:VD_BHSI 1 "register_operand" "w")
845               (match_operand:VD_BHSI 2 "register_operand" "w"))
846             (<max_opp>:VD_BHSI
847               (match_dup 1)
848               (match_dup 2)))))]
849   "TARGET_SIMD"
850   "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
851   [(set_attr "type" "neon_abd<q>")]
854 (define_insn "aarch64_<su>abdl2<mode>_insn"
855   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
856         (zero_extend:<VDBLW>
857           (minus:<VHALF>
858             (USMAX:<VHALF>
859               (vec_select:<VHALF>
860                 (match_operand:VQW 1 "register_operand" "w")
861                 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
862               (vec_select:<VHALF>
863                 (match_operand:VQW 2 "register_operand" "w")
864                 (match_dup 3)))
865             (<max_opp>:<VHALF>
866               (vec_select:<VHALF>
867                 (match_dup 1)
868                 (match_dup 3))
869               (vec_select:<VHALF>
870                 (match_dup 2)
871                 (match_dup 3))))))]
873   "TARGET_SIMD"
874   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
875   [(set_attr "type" "neon_abd<q>")]
878 (define_expand "aarch64_<su>abdl2<mode>"
879   [(match_operand:<VDBLW> 0 "register_operand")
880    (USMAX:VQW
881      (match_operand:VQW 1 "register_operand")
882      (match_operand:VQW 2 "register_operand"))]
883   "TARGET_SIMD"
884   {
885     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
886     emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
887                                                  operands[2], hi));
888     DONE;
889   }
892 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
893   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
894         (abs:<VWIDE>
895           (minus:<VWIDE>
896             (ANY_EXTEND:<VWIDE>
897               (vec_select:<VHALF>
898                 (match_operand:VQW 1 "register_operand" "w")
899                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
900             (ANY_EXTEND:<VWIDE>
901               (vec_select:<VHALF>
902                 (match_operand:VQW 2 "register_operand" "w")
903                 (match_dup 3))))))]
904   "TARGET_SIMD"
905   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
906   [(set_attr "type" "neon_abd_long")]
909 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
910   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
911         (abs:<VWIDE>
912           (minus:<VWIDE>
913             (ANY_EXTEND:<VWIDE>
914               (vec_select:<VHALF>
915                 (match_operand:VQW 1 "register_operand" "w")
916                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
917             (ANY_EXTEND:<VWIDE>
918               (vec_select:<VHALF>
919                 (match_operand:VQW 2 "register_operand" "w")
920                 (match_dup 3))))))]
921   "TARGET_SIMD"
922   "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
923   [(set_attr "type" "neon_abd_long")]
926 (define_expand "vec_widen_<su>abd_hi_<mode>"
927   [(match_operand:<VWIDE> 0 "register_operand")
928    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
929    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
930   "TARGET_SIMD"
931   {
932     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
933     emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
934                                                        operands[2], p));
935     DONE;
936   }
939 (define_expand "vec_widen_<su>abd_lo_<mode>"
940   [(match_operand:<VWIDE> 0 "register_operand")
941    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
942    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
943   "TARGET_SIMD"
944   {
945     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
946     emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
947                                                        operands[2], p));
948     DONE;
949   }
952 (define_insn "aarch64_<su>abal<mode>"
953   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
954         (plus:<VWIDE>
955           (zero_extend:<VWIDE>
956             (minus:VD_BHSI
957               (USMAX:VD_BHSI
958                 (match_operand:VD_BHSI 2 "register_operand" "w")
959                 (match_operand:VD_BHSI 3 "register_operand" "w"))
960               (<max_opp>:VD_BHSI
961                 (match_dup 2)
962                 (match_dup 3))))
963           (match_operand:<VWIDE> 1 "register_operand" "0")))]
964   "TARGET_SIMD"
965   "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
966   [(set_attr "type" "neon_arith_acc<q>")]
969 (define_insn "aarch64_<su>abal2<mode>_insn"
970   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
971         (plus:<VDBLW>
972           (zero_extend:<VDBLW>
973             (minus:<VHALF>
974               (USMAX:<VHALF>
975                 (vec_select:<VHALF>
976                   (match_operand:VQW 2 "register_operand" "w")
977                   (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
978                 (vec_select:<VHALF>
979                   (match_operand:VQW 3 "register_operand" "w")
980                   (match_dup 4)))
981               (<max_opp>:<VHALF>
982                 (vec_select:<VHALF>
983                   (match_dup 2)
984                   (match_dup 4))
985                 (vec_select:<VHALF>
986                   (match_dup 3)
987                   (match_dup 4)))))
988           (match_operand:<VDBLW> 1 "register_operand" "0")))]
989   "TARGET_SIMD"
990   "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
991   [(set_attr "type" "neon_arith_acc<q>")]
994 (define_expand "aarch64_<su>abal2<mode>"
995   [(match_operand:<VDBLW> 0 "register_operand")
996    (match_operand:<VDBLW> 1 "register_operand")
997    (USMAX:VQW
998      (match_operand:VQW 2 "register_operand")
999      (match_operand:VQW 3 "register_operand"))]
1000   "TARGET_SIMD"
1001   {
1002     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1003     emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1004                                                  operands[2], operands[3], hi));
1005     DONE;
1006   }
1009 (define_expand "aarch64_<su>adalp<mode>"
1010   [(set (match_operand:<VDBLW> 0 "register_operand")
1011         (plus:<VDBLW>
1012           (plus:<VDBLW>
1013             (vec_select:<VDBLW>
1014               (ANY_EXTEND:<V2XWIDE>
1015                 (match_operand:VDQV_L 2 "register_operand"))
1016               (match_dup 3))
1017             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1018               (match_dup 4)))
1019           (match_operand:<VDBLW> 1 "register_operand")))]
1020  "TARGET_SIMD"
1022    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1023    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1024    operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1028 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1029   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1030         (plus:<VDBLW>
1031           (plus:<VDBLW>
1032             (vec_select:<VDBLW>
1033               (ANY_EXTEND:<V2XWIDE>
1034                 (match_operand:VDQV_L 2 "register_operand" "w"))
1035               (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1036             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1037               (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1038         (match_operand:<VDBLW> 1 "register_operand" "0")))]
1039  "TARGET_SIMD
1040   && !rtx_equal_p (operands[3], operands[4])"
1041  "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1042   [(set_attr "type" "neon_reduc_add<q>")]
1045 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1046 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
1047 ;; reduction of the difference into a V4SI vector and accumulate that into
1048 ;; operand 3 before copying that into the result operand 0.
1049 ;; Perform that with a sequence of:
1050 ;; UABDL2       tmp.8h, op1.16b, op2.16b
1051 ;; UABAL        tmp.8h, op1.8b, op2.8b
1052 ;; UADALP       op3.4s, tmp.8h
1053 ;; MOV          op0, op3 // should be eliminated in later passes.
1055 ;; For TARGET_DOTPROD we do:
1056 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1057 ;; UABD tmp2.16b, op1.16b, op2.16b
1058 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1059 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
1061 ;; The signed version just uses the signed variants of the above instructions
1062 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1063 ;; unsigned.
1065 (define_expand "<su>sadv16qi"
1066   [(use (match_operand:V4SI 0 "register_operand"))
1067    (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1068                 (match_operand:V16QI 2 "register_operand"))
1069    (use (match_operand:V4SI 3 "register_operand"))]
1070   "TARGET_SIMD"
1071   {
1072     if (TARGET_DOTPROD)
1073       {
1074         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1075         rtx abd = gen_reg_rtx (V16QImode);
1076         emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1077         emit_insn (gen_udot_prodv4siv16qi (operands[0], abd, ones,
1078                                            operands[3]));
1079         DONE;
1080       }
1081     rtx reduc = gen_reg_rtx (V8HImode);
1082     emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1083                                             operands[2]));
1084     emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1085                                          gen_lowpart (V8QImode, operands[1]),
1086                                          gen_lowpart (V8QImode,
1087                                                       operands[2])));
1088     emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1089     emit_move_insn (operands[0], operands[3]);
1090     DONE;
1091   }
1094 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1095   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1096         (plus:VDQ_BHSI (minus:VDQ_BHSI
1097                          (USMAX:VDQ_BHSI
1098                            (match_operand:VDQ_BHSI 2 "register_operand" "w")
1099                            (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1100                          (<max_opp>:VDQ_BHSI
1101                            (match_dup 2)
1102                            (match_dup 3)))
1103                        (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1104   "TARGET_SIMD"
1105   "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1106   [(set_attr "type" "neon_arith_acc<q>")]
1109 (define_insn "fabd<mode>3<vczle><vczbe>"
1110   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1111         (abs:VHSDF_HSDF
1112           (minus:VHSDF_HSDF
1113             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1114             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1115   "TARGET_SIMD"
1116   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1117   [(set_attr "type" "neon_fp_abd_<stype><q>")]
1120 ;; For AND (vector, register) and BIC (vector, immediate)
1121 (define_insn "and<mode>3<vczle><vczbe>"
1122   [(set (match_operand:VDQ_I 0 "register_operand")
1123         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1124                    (match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
1125   "TARGET_SIMD"
1126   {@ [ cons: =0 , 1 , 2   ]
1127      [ w        , w , w   ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1128      [ w        , 0 , Db  ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
1129   }
1130   [(set_attr "type" "neon_logic<q>")]
1133 ;; For ORR (vector, register) and ORR (vector, immediate)
1134 (define_insn "ior<mode>3<vczle><vczbe>"
1135   [(set (match_operand:VDQ_I 0 "register_operand")
1136         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1137                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))]
1138   "TARGET_SIMD"
1139   {@ [ cons: =0 , 1 , 2  ]
1140      [ w        , w , w  ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1141      [ w        , 0 , Do ] << aarch64_output_simd_orr_imm (operands[2], <bitsize>);
1142   }
1143   [(set_attr "type" "neon_logic<q>")]
1146 ;; For EOR (vector, register) and SVE EOR (vector, immediate)
1147 (define_insn "xor<mode>3<vczle><vczbe>"
1148   [(set (match_operand:VDQ_I 0 "register_operand")
1149         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1150                    (match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))]
1151   "TARGET_SIMD"
1152   {@ [ cons: =0 , 1 , 2  ]
1153      [ w        , w , w  ] eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1154      [ w        , 0 , De ] << aarch64_output_simd_xor_imm (operands[2], <bitsize>);
1155   }
1156   [(set_attr "type" "neon_logic<q>")]
1159 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1160   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1161         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1162   "TARGET_SIMD"
1163   "not\t%0.<Vbtype>, %1.<Vbtype>"
1164   [(set_attr "type" "neon_logic<q>")]
1167 (define_insn "@aarch64_simd_vec_set<mode>"
1168   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1169         (vec_merge:VALL_F16
1170             (vec_duplicate:VALL_F16
1171                 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1172             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1173             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1174   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1175   {
1176    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1177    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1178    switch (which_alternative)
1179      {
1180      case 0:
1181         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1182      case 1:
1183         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1184      case 2:
1185         return "ld1\\t{%0.<Vetype>}[%p2], %1";
1186      default:
1187         gcc_unreachable ();
1188      }
1189   }
1190   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1193 (define_insn "aarch64_simd_vec_set_zero<mode>"
1194   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1195         (vec_merge:VALL_F16
1196             (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1197             (match_operand:VALL_F16 3 "register_operand" "0")
1198             (match_operand:SI 2 "immediate_operand" "i")))]
1199   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1200   {
1201     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1202     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1203     return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1204   }
1207 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1208   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1209         (vec_merge:VALL_F16
1210             (vec_duplicate:VALL_F16
1211               (vec_select:<VEL>
1212                 (match_operand:VALL_F16 3 "register_operand" "w")
1213                 (parallel
1214                   [(match_operand:SI 4 "immediate_operand" "i")])))
1215             (match_operand:VALL_F16 1 "register_operand" "0")
1216             (match_operand:SI 2 "immediate_operand" "i")))]
1217   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1218   {
1219     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1220     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1221     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1223     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1224   }
1225   [(set_attr "type" "neon_ins<q>")]
1228 (define_insn "@aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1229   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1230         (vec_merge:VALL_F16_NO_V2Q
1231             (vec_duplicate:VALL_F16_NO_V2Q
1232               (vec_select:<VEL>
1233                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1234                 (parallel
1235                   [(match_operand:SI 4 "immediate_operand" "i")])))
1236             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1237             (match_operand:SI 2 "immediate_operand" "i")))]
1238   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1239   {
1240     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1241     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1242     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1243                                            INTVAL (operands[4]));
1245     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1246   }
1247   [(set_attr "type" "neon_ins<q>")]
1250 (define_expand "signbit<mode>2"
1251   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1252    (use (match_operand:VDQSF 1 "register_operand"))]
1253   "TARGET_SIMD"
1255   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1256   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1257                                                         shift_amount);
1258   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1260   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1261                                                  shift_vector));
1262   DONE;
1265 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1266  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1267        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1268                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1269  "TARGET_SIMD"
1270  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1271   [(set_attr "type" "neon_shift_imm<q>")]
1274 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1275  [(set (match_operand:VDQ_I 0 "register_operand")
1276        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1277                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm")))]
1278  "TARGET_SIMD"
1279  {@ [ cons: =0 , 1 , 2  ; attrs: type        ]
1280     [ w        , w , D1 ; neon_compare<q>    ] cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1281     [ w        , w , Dr ; neon_shift_imm<q>  ] sshr\t%0.<Vtype>, %1.<Vtype>, %2
1282   }
1285 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1286  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1287         (plus:VDQ_I
1288            (SHIFTRT:VDQ_I
1289                 (match_operand:VDQ_I 2 "register_operand" "w")
1290                 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1291            (match_operand:VDQ_I 1 "register_operand" "0")))]
1292   "TARGET_SIMD"
1293   "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1294   [(set_attr "type" "neon_shift_acc<q>")]
1297 ;; After all the combinations and propagations of ROTATE have been
1298 ;; attempted split any remaining vector rotates into SHL + USRA sequences.
1299 ;; Don't match this after reload as the various possible sequence for this
1300 ;; require temporary registers.
1301 (define_insn_and_split "*aarch64_simd_rotate_imm<mode>"
1302   [(set (match_operand:VDQ_I 0 "register_operand" "=&w")
1303         (rotate:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1304                       (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm")))]
1305   "TARGET_SIMD && can_create_pseudo_p ()"
1306   "#"
1307   "&& 1"
1308   [(set (match_dup 3)
1309         (ashift:VDQ_I (match_dup 1)
1310                       (match_dup 2)))
1311    (set (match_dup 0)
1312         (plus:VDQ_I
1313           (lshiftrt:VDQ_I
1314             (match_dup 1)
1315             (match_dup 4))
1316           (match_dup 3)))]
1317   {
1318     if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
1319       DONE;
1321     operands[3] = gen_reg_rtx (<MODE>mode);
1322     rtx shft_amnt = unwrap_const_vec_duplicate (operands[2]);
1323     int bitwidth = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1324     operands[4]
1325       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1326                                            bitwidth - INTVAL (shft_amnt));
1327   }
1328   [(set_attr "length" "8")]
1331 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1332  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1333         (plus:VSDQ_I_DI
1334           (truncate:VSDQ_I_DI
1335             (SHIFTRT:<V2XWIDE>
1336               (plus:<V2XWIDE>
1337                 (<SHIFTEXTEND>:<V2XWIDE>
1338                   (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1339                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1340               (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1341           (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1342   "TARGET_SIMD
1343    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1344   "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1345   [(set_attr "type" "neon_shift_acc<q>")]
1348 (define_expand "aarch64_<sra_op>sra_n<mode>"
1349  [(set (match_operand:VDQ_I 0 "register_operand")
1350         (plus:VDQ_I
1351            (SHIFTRT:VDQ_I
1352                 (match_operand:VDQ_I 2 "register_operand")
1353                 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1354            (match_operand:VDQ_I 1 "register_operand")))]
1355   "TARGET_SIMD"
1356   {
1357     operands[3]
1358       = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1359   }
1362 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1363   [(match_operand:VSDQ_I_DI 0 "register_operand")
1364    (match_operand:VSDQ_I_DI 1 "register_operand")
1365    (SHIFTRT:VSDQ_I_DI
1366      (match_operand:VSDQ_I_DI 2 "register_operand")
1367      (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1368   "TARGET_SIMD"
1369   {
1370     /* Use this expander to create the rounding constant vector, which is
1371        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
1372        RTL is generated when handling the DImode expanders.  */
1373     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1374     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1375     rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1376     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1377     if (VECTOR_MODE_P (<MODE>mode))
1378       {
1379         shft = gen_const_vec_duplicate (<MODE>mode, shft);
1380         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1381       }
1383     emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1384                                                       operands[2], shft, rnd));
1385     DONE;
1386   }
1389 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1390  [(set (match_operand:VDQ_I 0 "register_operand")
1391        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1392                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm")))]
1393  "TARGET_SIMD"
1394   {@ [ cons: =0, 1,  2   ; attrs: type       ]
1395      [ w       , w,  vs1 ; neon_add<q>       ] add\t%0.<Vtype>, %1.<Vtype>, %1.<Vtype>
1396      [ w       , w,  Dl  ; neon_shift_imm<q> ] shl\t%0.<Vtype>, %1.<Vtype>, %2
1397   }
1400 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1401  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1402        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1403                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1404  "TARGET_SIMD"
1405  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1406   [(set_attr "type" "neon_shift_reg<q>")]
1409 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1410  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1411        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1412                     (match_operand:VDQ_I 2 "register_operand" "w")]
1413                    UNSPEC_ASHIFT_UNSIGNED))]
1414  "TARGET_SIMD"
1415  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1416   [(set_attr "type" "neon_shift_reg<q>")]
1419 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1420  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1421        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1422                     (match_operand:VDQ_I 2 "register_operand" "w")]
1423                    UNSPEC_ASHIFT_SIGNED))]
1424  "TARGET_SIMD"
1425  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1426   [(set_attr "type" "neon_shift_reg<q>")]
1429 (define_expand "ashl<mode>3"
1430   [(match_operand:VDQ_I 0 "register_operand")
1431    (match_operand:VDQ_I 1 "register_operand")
1432    (match_operand:SI  2 "general_operand")]
1433  "TARGET_SIMD"
1435   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1436   int shift_amount;
1438   if (CONST_INT_P (operands[2]))
1439     {
1440       shift_amount = INTVAL (operands[2]);
1441       if (shift_amount >= 0 && shift_amount < bit_width)
1442         {
1443           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1444                                                        shift_amount);
1445           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1446                                                      operands[1],
1447                                                      tmp));
1448           DONE;
1449         }
1450     }
1452   operands[2] = force_reg (SImode, operands[2]);
1454   rtx tmp = gen_reg_rtx (<MODE>mode);
1455   emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1456                                                                operands[2],
1457                                                                0)));
1458   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1459   DONE;
1462 (define_expand "lshr<mode>3"
1463   [(match_operand:VDQ_I 0 "register_operand")
1464    (match_operand:VDQ_I 1 "register_operand")
1465    (match_operand:SI  2 "general_operand")]
1466  "TARGET_SIMD"
1468   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1469   int shift_amount;
1471   if (CONST_INT_P (operands[2]))
1472     {
1473       shift_amount = INTVAL (operands[2]);
1474       if (shift_amount > 0 && shift_amount <= bit_width)
1475         {
1476           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1477                                                        shift_amount);
1478           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1479                                                   operands[1],
1480                                                   tmp));
1481           DONE;
1482         }
1483     }
1485   operands[2] = force_reg (SImode, operands[2]);
1487   rtx tmp = gen_reg_rtx (SImode);
1488   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1489   emit_insn (gen_negsi2 (tmp, operands[2]));
1490   emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1491                                          convert_to_mode (<VEL>mode, tmp, 0)));
1492   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1493                                                       tmp1));
1494   DONE;
1497 (define_expand "ashr<mode>3"
1498   [(match_operand:VDQ_I 0 "register_operand")
1499    (match_operand:VDQ_I 1 "register_operand")
1500    (match_operand:SI  2 "general_operand")]
1501  "TARGET_SIMD"
1503   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1504   int shift_amount;
1506   if (CONST_INT_P (operands[2]))
1507     {
1508       shift_amount = INTVAL (operands[2]);
1509       if (shift_amount > 0 && shift_amount <= bit_width)
1510         {
1511           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1512                                                        shift_amount);
1513           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1514                                                   operands[1],
1515                                                   tmp));
1516           DONE;
1517         }
1518     }
1520   operands[2] = force_reg (SImode, operands[2]);
1522   rtx tmp = gen_reg_rtx (SImode);
1523   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1524   emit_insn (gen_negsi2 (tmp, operands[2]));
1525   emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1526                                                                 tmp, 0)));
1527   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1528                                                     tmp1));
1529   DONE;
1532 (define_expand "vashl<mode>3"
1533  [(match_operand:VDQ_I 0 "register_operand")
1534   (match_operand:VDQ_I 1 "register_operand")
1535   (match_operand:VDQ_I 2 "register_operand")]
1536  "TARGET_SIMD"
1538   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1539                                               operands[2]));
1540   DONE;
1543 (define_expand "vashr<mode>3"
1544  [(match_operand:VDQ_I 0 "register_operand")
1545   (match_operand:VDQ_I 1 "register_operand")
1546   (match_operand:VDQ_I 2 "register_operand")]
1547  "TARGET_SIMD"
1549   rtx neg = gen_reg_rtx (<MODE>mode);
1550   emit (gen_neg<mode>2 (neg, operands[2]));
1551   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1552                                                     neg));
1553   DONE;
1556 ;; DI vector shift
1557 (define_expand "aarch64_ashr_simddi"
1558   [(match_operand:DI 0 "register_operand")
1559    (match_operand:DI 1 "register_operand")
1560    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1561   "TARGET_SIMD"
1562   {
1563     /* An arithmetic shift right by 64 fills the result with copies of the sign
1564        bit, just like asr by 63 - however the standard pattern does not handle
1565        a shift by 64.  */
1566     if (INTVAL (operands[2]) == 64)
1567       operands[2] = GEN_INT (63);
1568     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1569     DONE;
1570   }
1573 (define_expand "vlshr<mode>3"
1574  [(match_operand:VDQ_I 0 "register_operand")
1575   (match_operand:VDQ_I 1 "register_operand")
1576   (match_operand:VDQ_I 2 "register_operand")]
1577  "TARGET_SIMD"
1579   rtx neg = gen_reg_rtx (<MODE>mode);
1580   emit (gen_neg<mode>2 (neg, operands[2]));
1581   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1582                                                       neg));
1583   DONE;
1586 (define_expand "aarch64_lshr_simddi"
1587   [(match_operand:DI 0 "register_operand")
1588    (match_operand:DI 1 "register_operand")
1589    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1590   "TARGET_SIMD"
1591   {
1592     if (INTVAL (operands[2]) == 64)
1593       emit_move_insn (operands[0], const0_rtx);
1594     else
1595       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1596     DONE;
1597   }
1600 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1601 (define_insn "vec_shr_<mode><vczle><vczbe>"
1602   [(set (match_operand:VD 0 "register_operand" "=w")
1603         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1604                     (match_operand:SI 2 "immediate_operand" "i")]
1605                    UNSPEC_VEC_SHR))]
1606   "TARGET_SIMD"
1607   {
1608     if (BYTES_BIG_ENDIAN)
1609       return "shl %d0, %d1, %2";
1610     else
1611       return "ushr %d0, %d1, %2";
1612   }
1613   [(set_attr "type" "neon_shift_imm")]
1616 (define_expand "vec_set<mode>"
1617   [(match_operand:VALL_F16 0 "register_operand")
1618    (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1619    (match_operand:SI 2 "immediate_operand")]
1620   "TARGET_SIMD"
1621   {
1622     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1623     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1624                                           GEN_INT (elem), operands[0]));
1625     DONE;
1626   }
1630 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1631  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1632        (plus:VDQ_BHSI (mult:VDQ_BHSI
1633                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1634                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1635                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1636  "TARGET_SIMD"
1637  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1638   [(set_attr "type" "neon_mla_<Vetype><q>")]
1641 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1642  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1643        (plus:VDQHS
1644          (mult:VDQHS
1645            (vec_duplicate:VDQHS
1646               (vec_select:<VEL>
1647                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1648                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1649            (match_operand:VDQHS 3 "register_operand" "w"))
1650          (match_operand:VDQHS 4 "register_operand" "0")))]
1651  "TARGET_SIMD"
1652   {
1653     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1654     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1655   }
1656   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1659 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1660  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1661        (plus:VDQHS
1662          (mult:VDQHS
1663            (vec_duplicate:VDQHS
1664               (vec_select:<VEL>
1665                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1666                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1667            (match_operand:VDQHS 3 "register_operand" "w"))
1668          (match_operand:VDQHS 4 "register_operand" "0")))]
1669  "TARGET_SIMD"
1670   {
1671     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1672     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1673   }
1674   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1677 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1678  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1679         (plus:VDQHS
1680           (mult:VDQHS
1681             (vec_duplicate:VDQHS
1682               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1683             (match_operand:VDQHS 2 "register_operand" "w"))
1684           (match_operand:VDQHS 1 "register_operand" "0")))]
1685  "TARGET_SIMD"
1686  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1687   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1690 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1691  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1692        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1693                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1694                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1695  "TARGET_SIMD"
1696  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1697   [(set_attr "type" "neon_mla_<Vetype><q>")]
1700 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1701  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1702        (minus:VDQHS
1703          (match_operand:VDQHS 4 "register_operand" "0")
1704          (mult:VDQHS
1705            (vec_duplicate:VDQHS
1706               (vec_select:<VEL>
1707                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1708                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1709            (match_operand:VDQHS 3 "register_operand" "w"))))]
1710  "TARGET_SIMD"
1711   {
1712     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1713     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1714   }
1715   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1718 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1719  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1720        (minus:VDQHS
1721          (match_operand:VDQHS 4 "register_operand" "0")
1722          (mult:VDQHS
1723            (vec_duplicate:VDQHS
1724               (vec_select:<VEL>
1725                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1726                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1727            (match_operand:VDQHS 3 "register_operand" "w"))))]
1728  "TARGET_SIMD"
1729   {
1730     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1731     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1732   }
1733   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1736 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1737   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1738         (minus:VDQHS
1739           (match_operand:VDQHS 1 "register_operand" "0")
1740           (mult:VDQHS
1741             (vec_duplicate:VDQHS
1742               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1743             (match_operand:VDQHS 2 "register_operand" "w"))))]
1744   "TARGET_SIMD"
1745   "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1746   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1749 ;; Max/Min operations.
1750 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1751  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1752        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1753                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1754  "TARGET_SIMD"
1755  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1756   [(set_attr "type" "neon_minmax<q>")]
1759 (define_expand "<su><maxmin>v2di3"
1760  [(set (match_operand:V2DI 0 "register_operand")
1761        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1762                     (match_operand:V2DI 2 "register_operand")))]
1763  "TARGET_SIMD"
1765   enum rtx_code cmp_operator;
1766   rtx cmp_fmt;
1768   switch (<CODE>)
1769     {
1770     case UMIN:
1771       cmp_operator = LTU;
1772       break;
1773     case SMIN:
1774       cmp_operator = LT;
1775       break;
1776     case UMAX:
1777       cmp_operator = GTU;
1778       break;
1779     case SMAX:
1780       cmp_operator = GT;
1781       break;
1782     default:
1783       gcc_unreachable ();
1784     }
1786   rtx mask = gen_reg_rtx (V2DImode);
1787   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1788   emit_insn (gen_vec_cmpv2div2di (mask, cmp_fmt, operands[1], operands[2]));
1789   emit_insn (gen_vcond_mask_v2div2di (operands[0], operands[1],
1790                                       operands[2], mask));
1791   DONE;
1794 ;; Pairwise Integer Max/Min operations.
1795 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1796  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1797        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1798                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1799                         MAXMINV))]
1800  "TARGET_SIMD"
1801  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1802   [(set_attr "type" "neon_minmax<q>")]
1805 ;; Pairwise FP Max/Min operations.
1806 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1807  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1808        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1809                       (match_operand:VHSDF 2 "register_operand" "w")]
1810                       FMAXMINV))]
1811  "TARGET_SIMD"
1812  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1813   [(set_attr "type" "neon_minmax<q>")]
1816 ;; vec_concat gives a new vector with the low elements from operand 1, and
1817 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1818 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1819 ;; What that means, is that the RTL descriptions of the below patterns
1820 ;; need to change depending on endianness.
1822 ;; Narrowing operations.
1824 (define_insn "aarch64_xtn2<mode>_insn_le"
1825   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1826         (vec_concat:<VNARROWQ2>
1827           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1828           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1829   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1830   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1831   [(set_attr "type" "neon_move_narrow_q")]
1834 (define_insn "aarch64_xtn2<mode>_insn_be"
1835   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1836         (vec_concat:<VNARROWQ2>
1837           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1838           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1839   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1840   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1841   [(set_attr "type" "neon_move_narrow_q")]
1844 (define_expand "aarch64_xtn2<mode>"
1845   [(match_operand:<VNARROWQ2> 0 "register_operand")
1846    (match_operand:<VNARROWQ> 1 "register_operand")
1847    (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1848   "TARGET_SIMD"
1849   {
1850     if (BYTES_BIG_ENDIAN)
1851       emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1852                                                  operands[2]));
1853     else
1854       emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1855                                                  operands[2]));
1856     DONE;
1857   }
1860 (define_insn "*aarch64_narrow_trunc<mode>"
1861   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1862         (vec_concat:<VNARROWQ2>
1863           (truncate:<VNARROWQ>
1864             (match_operand:VQN 1 "register_operand" "w"))
1865           (truncate:<VNARROWQ>
1866             (match_operand:VQN 2 "register_operand" "w"))))]
1867   "TARGET_SIMD"
1869   if (!BYTES_BIG_ENDIAN)
1870     return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1871   else
1872     return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1874   [(set_attr "type" "neon_permute<q>")]
1877 (define_insn "*aarch64_trunc_concat<mode>"
1878   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1879         (truncate:<VNARROWQ>
1880           (vec_concat:VQN
1881             (match_operand:<VHALF> 1 "register_operand" "w")
1882             (match_operand:<VHALF> 2 "register_operand" "w"))))]
1883   "TARGET_SIMD"
1885   if (!BYTES_BIG_ENDIAN)
1886     return "uzp1\\t%0.<Vntype>, %1.<Vntype>, %2.<Vntype>";
1887   else
1888     return "uzp1\\t%0.<Vntype>, %2.<Vntype>, %1.<Vntype>";
1890   [(set_attr "type" "neon_permute<q>")]
1893 ;; Packing doubles.
1895 (define_expand "vec_pack_trunc_<mode>"
1896  [(match_operand:<VNARROWD> 0 "register_operand")
1897   (match_operand:VDN 1 "general_operand")
1898   (match_operand:VDN 2 "general_operand")]
1899  "TARGET_SIMD"
1901   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1902   emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1903   emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1904   DONE;
1907 ;; Packing quads.
1909 (define_expand "vec_pack_trunc_<mode>"
1910  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1911        (vec_concat:<VNARROWQ2>
1912          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1913          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1914  "TARGET_SIMD"
1916    rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1917    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1918    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1920    emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1922    if (BYTES_BIG_ENDIAN)
1923      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1924                                                 operands[hi]));
1925    else
1926      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1927                                                 operands[hi]));
1928    DONE;
1932 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1933   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1934         (vec_concat:<VNARROWQ2>
1935           (truncate:<VNARROWQ>
1936             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1937               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1938           (truncate:<VNARROWQ>
1939             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1940               (match_dup 2)))))]
1941   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1942   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1943   [(set_attr "type" "neon_permute<q>")]
1946 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1947   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1948         (vec_concat:<VNARROWQ2>
1949           (truncate:<VNARROWQ>
1950             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1951               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1952           (truncate:<VNARROWQ>
1953             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1954               (match_dup 2)))))]
1955   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1956   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1957   [(set_attr "type" "neon_permute<q>")]
1960 ;; Widening operations.
1962 (define_insn_and_split "aarch64_simd_vec_unpack<su>_hi_<mode>"
1963   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1964         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1965                                (match_operand:VQW 1 "register_operand" "w")
1966                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1967                             )))]
1968   "TARGET_SIMD"
1969   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1970   "&& <CODE> == ZERO_EXTEND
1971    && aarch64_split_simd_shift_p (insn)"
1972   [(const_int 0)]
1973   {
1974     /* On many cores, it is cheaper to implement UXTL2 using a ZIP2 with zero,
1975        provided that the cost of the zero can be amortized over several
1976        operations.  We'll later recombine the zero and zip if there are
1977        not sufficient uses of the zero to make the split worthwhile.  */
1978     rtx res = simplify_gen_subreg (<MODE>mode, operands[0], <VWIDE>mode, 0);
1979     rtx zero = aarch64_gen_shareable_zero (<MODE>mode);
1980     emit_insn (gen_aarch64_zip2<mode> (res, operands[1], zero));
1981     DONE;
1982   }
1983   [(set_attr "type" "neon_shift_imm_long")]
1986 (define_expand "vec_unpack<su>_hi_<mode>"
1987   [(match_operand:<VWIDE> 0 "register_operand")
1988    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1989   "TARGET_SIMD"
1990   {
1991     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1992     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1993                                                           operands[1], p));
1994     DONE;
1995   }
1998 (define_expand "vec_unpack<su>_lo_<mode>"
1999   [(set (match_operand:<VWIDE> 0 "register_operand")
2000         (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")))]
2001   "TARGET_SIMD"
2002   {
2003     operands[1] = lowpart_subreg (<VHALF>mode, operands[1], <MODE>mode);
2004   }
2007 ;; Widening arithmetic.
2009 (define_insn "*aarch64_<su>mlal_lo<mode>"
2010   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2011         (plus:<VWIDE>
2012           (mult:<VWIDE>
2013               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2014                  (match_operand:VQW 2 "register_operand" "w")
2015                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2016               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2017                  (match_operand:VQW 4 "register_operand" "w")
2018                  (match_dup 3))))
2019           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2020   "TARGET_SIMD"
2021   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2022   [(set_attr "type" "neon_mla_<Vetype>_long")]
2025 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2026   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2027         (plus:<VWIDE>
2028           (mult:<VWIDE>
2029               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2030                  (match_operand:VQW 2 "register_operand" "w")
2031                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2032               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2033                  (match_operand:VQW 4 "register_operand" "w")
2034                  (match_dup 3))))
2035           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2036   "TARGET_SIMD"
2037   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2038   [(set_attr "type" "neon_mla_<Vetype>_long")]
2041 (define_expand "aarch64_<su>mlal_hi<mode>"
2042   [(match_operand:<VWIDE> 0 "register_operand")
2043    (match_operand:<VWIDE> 1 "register_operand")
2044    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2045    (match_operand:VQW 3 "register_operand")]
2046   "TARGET_SIMD"
2048   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2049   emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2050                                                  operands[2], p, operands[3]));
2051   DONE;
2055 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2056   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2057         (plus:<VWIDE>
2058           (mult:<VWIDE>
2059             (ANY_EXTEND:<VWIDE>
2060               (vec_select:<VHALF>
2061                 (match_operand:VQ_HSI 2 "register_operand" "w")
2062                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2063             (vec_duplicate:<VWIDE>
2064               (ANY_EXTEND:<VWIDE_S>
2065                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2066           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2067   "TARGET_SIMD"
2068   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2069   [(set_attr "type" "neon_mla_<Vetype>_long")]
2072 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2073   [(match_operand:<VWIDE> 0 "register_operand")
2074    (match_operand:<VWIDE> 1 "register_operand")
2075    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2076    (match_operand:<VEL> 3 "register_operand")]
2077   "TARGET_SIMD"
2079   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2080   emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2081              operands[1], operands[2], p, operands[3]));
2082   DONE;
2086 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2087   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2088         (minus:<VWIDE>
2089           (match_operand:<VWIDE> 1 "register_operand" "0")
2090           (mult:<VWIDE>
2091               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2092                  (match_operand:VQW 2 "register_operand" "w")
2093                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2094               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2095                  (match_operand:VQW 4 "register_operand" "w")
2096                  (match_dup 3))))))]
2097   "TARGET_SIMD"
2098   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2099   [(set_attr "type" "neon_mla_<Vetype>_long")]
2102 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2103   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2104         (minus:<VWIDE>
2105           (match_operand:<VWIDE> 1 "register_operand" "0")
2106           (mult:<VWIDE>
2107               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2108                  (match_operand:VQW 2 "register_operand" "w")
2109                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2110               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2111                  (match_operand:VQW 4 "register_operand" "w")
2112                  (match_dup 3))))))]
2113   "TARGET_SIMD"
2114   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2115   [(set_attr "type" "neon_mla_<Vetype>_long")]
2118 (define_expand "aarch64_<su>mlsl_hi<mode>"
2119   [(match_operand:<VWIDE> 0 "register_operand")
2120    (match_operand:<VWIDE> 1 "register_operand")
2121    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2122    (match_operand:VQW 3 "register_operand")]
2123   "TARGET_SIMD"
2125   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2126   emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2127                                                  operands[2], p, operands[3]));
2128   DONE;
2132 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2133   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2134         (minus:<VWIDE>
2135           (match_operand:<VWIDE> 1 "register_operand" "0")
2136           (mult:<VWIDE>
2137             (ANY_EXTEND:<VWIDE>
2138               (vec_select:<VHALF>
2139                 (match_operand:VQ_HSI 2 "register_operand" "w")
2140                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2141             (vec_duplicate:<VWIDE>
2142               (ANY_EXTEND:<VWIDE_S>
2143                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2144   "TARGET_SIMD"
2145   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2146   [(set_attr "type" "neon_mla_<Vetype>_long")]
2149 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2150   [(match_operand:<VWIDE> 0 "register_operand")
2151    (match_operand:<VWIDE> 1 "register_operand")
2152    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2153    (match_operand:<VEL> 3 "register_operand")]
2154   "TARGET_SIMD"
2156   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2157   emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2158              operands[1], operands[2], p, operands[3]));
2159   DONE;
2163 (define_insn "aarch64_<su>mlal<mode>"
2164   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2165         (plus:<VWIDE>
2166           (mult:<VWIDE>
2167             (ANY_EXTEND:<VWIDE>
2168               (match_operand:VD_BHSI 2 "register_operand" "w"))
2169             (ANY_EXTEND:<VWIDE>
2170               (match_operand:VD_BHSI 3 "register_operand" "w")))
2171           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2172   "TARGET_SIMD"
2173   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2174   [(set_attr "type" "neon_mla_<Vetype>_long")]
2177 (define_insn "aarch64_<su>mlal_n<mode>"
2178   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2179         (plus:<VWIDE>
2180           (mult:<VWIDE>
2181             (ANY_EXTEND:<VWIDE>
2182               (match_operand:VD_HSI 2 "register_operand" "w"))
2183             (vec_duplicate:<VWIDE>
2184               (ANY_EXTEND:<VWIDE_S>
2185                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2186           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2187   "TARGET_SIMD"
2188   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2189   [(set_attr "type" "neon_mla_<Vetype>_long")]
2192 (define_insn "aarch64_<su>mlsl<mode>"
2193   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2194         (minus:<VWIDE>
2195           (match_operand:<VWIDE> 1 "register_operand" "0")
2196           (mult:<VWIDE>
2197             (ANY_EXTEND:<VWIDE>
2198               (match_operand:VD_BHSI 2 "register_operand" "w"))
2199             (ANY_EXTEND:<VWIDE>
2200               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2201   "TARGET_SIMD"
2202   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2203   [(set_attr "type" "neon_mla_<Vetype>_long")]
2206 (define_insn "aarch64_<su>mlsl_n<mode>"
2207   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2208         (minus:<VWIDE>
2209           (match_operand:<VWIDE> 1 "register_operand" "0")
2210           (mult:<VWIDE>
2211             (ANY_EXTEND:<VWIDE>
2212               (match_operand:VD_HSI 2 "register_operand" "w"))
2213             (vec_duplicate:<VWIDE>
2214               (ANY_EXTEND:<VWIDE_S>
2215                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2216   "TARGET_SIMD"
2217   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2218   [(set_attr "type" "neon_mla_<Vetype>_long")]
2221 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2222  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2223        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2224                            (match_operand:VQW 1 "register_operand" "w")
2225                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2226                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2227                            (match_operand:VQW 2 "register_operand" "w")
2228                            (match_dup 3)))))]
2229   "TARGET_SIMD"
2230   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2231   [(set_attr "type" "neon_mul_<Vetype>_long")]
2234 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2235   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2236         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2237                          (match_operand:VD_BHSI 1 "register_operand" "w"))
2238                       (ANY_EXTEND:<VWIDE>
2239                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2240   "TARGET_SIMD"
2241   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2242   [(set_attr "type" "neon_mul_<Vetype>_long")]
2245 (define_expand "vec_widen_<su>mult_lo_<mode>"
2246   [(match_operand:<VWIDE> 0 "register_operand")
2247    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2248    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2249  "TARGET_SIMD"
2251    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2252    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2253                                                        operands[1],
2254                                                        operands[2], p));
2255    DONE;
2259 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2260  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2261       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2262                             (match_operand:VQW 1 "register_operand" "w")
2263                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2264                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2265                             (match_operand:VQW 2 "register_operand" "w")
2266                             (match_dup 3)))))]
2267   "TARGET_SIMD"
2268   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2269   [(set_attr "type" "neon_mul_<Vetype>_long")]
2272 (define_expand "vec_widen_<su>mult_hi_<mode>"
2273   [(match_operand:<VWIDE> 0 "register_operand")
2274    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2275    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2276  "TARGET_SIMD"
2278    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2279    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2280                                                        operands[1],
2281                                                        operands[2], p));
2282    DONE;
2287 ;; vmull_lane_s16 intrinsics
2288 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2289   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2290         (mult:<VWIDE>
2291           (ANY_EXTEND:<VWIDE>
2292             (match_operand:<VCOND> 1 "register_operand" "w"))
2293           (vec_duplicate:<VWIDE>
2294             (ANY_EXTEND:<VWIDE_S>
2295               (vec_select:<VEL>
2296                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2297                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2298   "TARGET_SIMD"
2299   {
2300     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2301     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2302   }
2303   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2306 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2307   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2308         (mult:<VWIDE>
2309           (ANY_EXTEND:<VWIDE>
2310             (vec_select:<VHALF>
2311               (match_operand:VQ_HSI 1 "register_operand" "w")
2312               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2313           (vec_duplicate:<VWIDE>
2314             (ANY_EXTEND:<VWIDE_S>
2315               (vec_select:<VEL>
2316                 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2317                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2318   "TARGET_SIMD"
2319   {
2320     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2321     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2322   }
2323   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2326 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2327   [(match_operand:<VWIDE> 0 "register_operand")
2328    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2329    (match_operand:<VCOND> 2 "register_operand")
2330    (match_operand:SI 3 "immediate_operand")]
2331   "TARGET_SIMD"
2333   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2334   emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2335              operands[1], p, operands[2], operands[3]));
2336   DONE;
2340 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2341   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2342         (mult:<VWIDE>
2343           (ANY_EXTEND:<VWIDE>
2344             (vec_select:<VHALF>
2345               (match_operand:VQ_HSI 1 "register_operand" "w")
2346               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2347           (vec_duplicate:<VWIDE>
2348             (ANY_EXTEND:<VWIDE_S>
2349               (vec_select:<VEL>
2350                 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2351                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2352   "TARGET_SIMD"
2353   {
2354     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2355     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2356   }
2357   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2360 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2361   [(match_operand:<VWIDE> 0 "register_operand")
2362    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2363    (match_operand:<VCONQ> 2 "register_operand")
2364    (match_operand:SI 3 "immediate_operand")]
2365   "TARGET_SIMD"
2367   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2368   emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2369              operands[1], p, operands[2], operands[3]));
2370   DONE;
2374 (define_insn "aarch64_<su>mull_n<mode>"
2375   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2376         (mult:<VWIDE>
2377           (ANY_EXTEND:<VWIDE>
2378             (match_operand:VD_HSI 1 "register_operand" "w"))
2379           (vec_duplicate:<VWIDE>
2380             (ANY_EXTEND:<VWIDE_S>
2381               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2382   "TARGET_SIMD"
2383   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2384   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2387 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2388   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2389         (mult:<VWIDE>
2390           (ANY_EXTEND:<VWIDE>
2391             (vec_select:<VHALF>
2392               (match_operand:VQ_HSI 1 "register_operand" "w")
2393               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2394           (vec_duplicate:<VWIDE>
2395             (ANY_EXTEND:<VWIDE_S>
2396               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2397   "TARGET_SIMD"
2398   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2399   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2402 (define_expand "aarch64_<su>mull_hi_n<mode>"
2403   [(match_operand:<VWIDE> 0 "register_operand")
2404    (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2405    (match_operand:<VEL> 2 "register_operand")]
2406  "TARGET_SIMD"
2408    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2409    emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2410                                                     operands[2], p));
2411    DONE;
2415 ;; vmlal_lane_s16 intrinsics
2416 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2417   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2418         (plus:<VWIDE>
2419           (mult:<VWIDE>
2420             (ANY_EXTEND:<VWIDE>
2421               (match_operand:<VCOND> 2 "register_operand" "w"))
2422             (vec_duplicate:<VWIDE>
2423               (ANY_EXTEND:<VWIDE_S>
2424                 (vec_select:<VEL>
2425                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
2426                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2427           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2428   "TARGET_SIMD"
2429   {
2430     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2431     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2432   }
2433   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2436 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2437   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2438         (plus:<VWIDE>
2439           (mult:<VWIDE>
2440             (ANY_EXTEND:<VWIDE>
2441               (vec_select:<VHALF>
2442                 (match_operand:VQ_HSI 2 "register_operand" "w")
2443                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2444             (vec_duplicate:<VWIDE>
2445               (ANY_EXTEND:<VWIDE_S>
2446                 (vec_select:<VEL>
2447                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2448                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2449           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2450   "TARGET_SIMD"
2451   {
2452     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2453     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2454   }
2455   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2458 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2459   [(match_operand:<VWIDE> 0 "register_operand")
2460    (match_operand:<VWIDE> 1 "register_operand")
2461    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2462    (match_operand:<VCOND> 3 "register_operand")
2463    (match_operand:SI 4 "immediate_operand")]
2464   "TARGET_SIMD"
2466   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2467   emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2468              operands[1], operands[2], p, operands[3], operands[4]));
2469   DONE;
2473 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2474   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2475         (plus:<VWIDE>
2476           (mult:<VWIDE>
2477             (ANY_EXTEND:<VWIDE>
2478               (vec_select:<VHALF>
2479                 (match_operand:VQ_HSI 2 "register_operand" "w")
2480                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2481             (vec_duplicate:<VWIDE>
2482               (ANY_EXTEND:<VWIDE_S>
2483                 (vec_select:<VEL>
2484                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2485                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2486           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2487   "TARGET_SIMD"
2488   {
2489     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2490     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2491   }
2492   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2495 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2496   [(match_operand:<VWIDE> 0 "register_operand")
2497    (match_operand:<VWIDE> 1 "register_operand")
2498    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2499    (match_operand:<VCONQ> 3 "register_operand")
2500    (match_operand:SI 4 "immediate_operand")]
2501   "TARGET_SIMD"
2503   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2504   emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2505              operands[1], operands[2], p, operands[3], operands[4]));
2506   DONE;
2510 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2511   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2512    (minus:<VWIDE>
2513      (match_operand:<VWIDE> 1 "register_operand" "0")
2514      (mult:<VWIDE>
2515        (ANY_EXTEND:<VWIDE>
2516          (match_operand:<VCOND> 2 "register_operand" "w"))
2517        (vec_duplicate:<VWIDE>
2518          (ANY_EXTEND:<VWIDE_S>
2519            (vec_select:<VEL>
2520              (match_operand:VDQHS 3 "register_operand" "<vwx>")
2521              (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2522   "TARGET_SIMD"
2523   {
2524     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2525     return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2526   }
2527   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2530 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2531   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2532         (minus:<VWIDE>
2533           (match_operand:<VWIDE> 1 "register_operand" "0")
2534           (mult:<VWIDE>
2535             (ANY_EXTEND:<VWIDE>
2536               (vec_select:<VHALF>
2537                 (match_operand:VQ_HSI 2 "register_operand" "w")
2538                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2539             (vec_duplicate:<VWIDE>
2540               (ANY_EXTEND:<VWIDE_S>
2541                 (vec_select:<VEL>
2542                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2543                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2544           )))]
2545   "TARGET_SIMD"
2546   {
2547     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2548     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2549   }
2550   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2553 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2554   [(match_operand:<VWIDE> 0 "register_operand")
2555    (match_operand:<VWIDE> 1 "register_operand")
2556    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2557    (match_operand:<VCOND> 3 "register_operand")
2558    (match_operand:SI 4 "immediate_operand")]
2559   "TARGET_SIMD"
2561   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2562   emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2563              operands[1], operands[2], p, operands[3], operands[4]));
2564   DONE;
2568 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2569   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2570         (minus:<VWIDE>
2571           (match_operand:<VWIDE> 1 "register_operand" "0")
2572           (mult:<VWIDE>
2573             (ANY_EXTEND:<VWIDE>
2574               (vec_select:<VHALF>
2575                 (match_operand:VQ_HSI 2 "register_operand" "w")
2576                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2577             (vec_duplicate:<VWIDE>
2578               (ANY_EXTEND:<VWIDE_S>
2579                 (vec_select:<VEL>
2580                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2581                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2582           )))]
2583   "TARGET_SIMD"
2584   {
2585     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2586     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2587   }
2588   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2591 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2592   [(match_operand:<VWIDE> 0 "register_operand")
2593    (match_operand:<VWIDE> 1 "register_operand")
2594    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2595    (match_operand:<VCONQ> 3 "register_operand")
2596    (match_operand:SI 4 "immediate_operand")]
2597   "TARGET_SIMD"
2599   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2600   emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2601              operands[1], operands[2], p, operands[3], operands[4]));
2602   DONE;
2606 ;; FP vector operations.
2607 ;; AArch64 AdvSIMD supports single-precision (32-bit) and 
2608 ;; double-precision (64-bit) floating-point data types and arithmetic as
2609 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable 
2610 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2612 ;; Floating-point operations can raise an exception.  Vectorizing such
2613 ;; operations are safe because of reasons explained below.
2615 ;; ARMv8 permits an extension to enable trapped floating-point
2616 ;; exception handling, however this is an optional feature.  In the
2617 ;; event of a floating-point exception being raised by vectorised
2618 ;; code then:
2619 ;; 1.  If trapped floating-point exceptions are available, then a trap
2620 ;;     will be taken when any lane raises an enabled exception.  A trap
2621 ;;     handler may determine which lane raised the exception.
2622 ;; 2.  Alternatively a sticky exception flag is set in the
2623 ;;     floating-point status register (FPSR).  Software may explicitly
2624 ;;     test the exception flags, in which case the tests will either
2625 ;;     prevent vectorisation, allowing precise identification of the
2626 ;;     failing operation, or if tested outside of vectorisable regions
2627 ;;     then the specific operation and lane are not of interest.
2629 ;; FP arithmetic operations.
2631 (define_insn "add<mode>3<vczle><vczbe>"
2632  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2633        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2634                    (match_operand:VHSDF 2 "register_operand" "w")))]
2635  "TARGET_SIMD"
2636  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2637   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2640 (define_insn "sub<mode>3<vczle><vczbe>"
2641  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2642        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2643                     (match_operand:VHSDF 2 "register_operand" "w")))]
2644  "TARGET_SIMD"
2645  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2646   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2649 (define_insn "mul<mode>3<vczle><vczbe>"
2650  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2651        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2652                    (match_operand:VHSDF 2 "register_operand" "w")))]
2653  "TARGET_SIMD"
2654  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2655   [(set_attr "type" "neon_fp_mul_<stype><q>")]
2658 (define_expand "div<mode>3"
2659  [(set (match_operand:VHSDF 0 "register_operand")
2660        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2661                   (match_operand:VHSDF 2 "register_operand")))]
2662  "TARGET_SIMD"
2664   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2665     DONE;
2667   operands[1] = force_reg (<MODE>mode, operands[1]);
2670 (define_insn "*div<mode>3<vczle><vczbe>"
2671  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2672        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2673                  (match_operand:VHSDF 2 "register_operand" "w")))]
2674  "TARGET_SIMD"
2675  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2676   [(set_attr "type" "neon_fp_div_<stype><q>")]
2679 (define_insn "neg<mode>2<vczle><vczbe>"
2680  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2681        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2682  "TARGET_SIMD"
2683  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2684   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2687 (define_insn "aarch64_fnegv2di2<vczle><vczbe>"
2688  [(set (match_operand:V2DI 0 "register_operand" "=w")
2689        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
2690                       UNSPEC_FNEG))]
2691  "TARGET_SIMD"
2692  "fneg\\t%0.2d, %1.2d"
2693   [(set_attr "type" "neon_fp_neg_d")]
2696 (define_insn "abs<mode>2<vczle><vczbe>"
2697  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2698        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2699  "TARGET_SIMD"
2700  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2701   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2704 (define_expand "aarch64_float_mla<mode>"
2705   [(set (match_operand:VDQF_DF 0 "register_operand")
2706         (plus:VDQF_DF
2707           (mult:VDQF_DF
2708             (match_operand:VDQF_DF 2 "register_operand")
2709             (match_operand:VDQF_DF 3 "register_operand"))
2710           (match_operand:VDQF_DF 1 "register_operand")))]
2711   "TARGET_SIMD"
2712   {
2713     rtx scratch = gen_reg_rtx (<MODE>mode);
2714     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2715     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2716     DONE;
2717   }
2720 (define_expand "aarch64_float_mls<mode>"
2721   [(set (match_operand:VDQF_DF 0 "register_operand")
2722         (minus:VDQF_DF
2723           (match_operand:VDQF_DF 1 "register_operand")
2724           (mult:VDQF_DF
2725             (match_operand:VDQF_DF 2 "register_operand")
2726             (match_operand:VDQF_DF 3 "register_operand"))))]
2727   "TARGET_SIMD"
2728   {
2729     rtx scratch = gen_reg_rtx (<MODE>mode);
2730     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2731     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2732     DONE;
2733   }
2736 (define_expand "aarch64_float_mla_n<mode>"
2737   [(set (match_operand:VDQSF 0 "register_operand")
2738         (plus:VDQSF
2739           (mult:VDQSF
2740             (vec_duplicate:VDQSF
2741               (match_operand:<VEL> 3 "register_operand"))
2742             (match_operand:VDQSF 2 "register_operand"))
2743           (match_operand:VDQSF 1 "register_operand")))]
2744   "TARGET_SIMD"
2745   {
2746     rtx scratch = gen_reg_rtx (<MODE>mode);
2747     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2748     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2749     DONE;
2750   }
2753 (define_expand "aarch64_float_mls_n<mode>"
2754   [(set (match_operand:VDQSF 0 "register_operand")
2755         (minus:VDQSF
2756           (match_operand:VDQSF 1 "register_operand")
2757           (mult:VDQSF
2758             (vec_duplicate:VDQSF
2759               (match_operand:<VEL> 3 "register_operand"))
2760             (match_operand:VDQSF 2 "register_operand"))))]
2761   "TARGET_SIMD"
2762   {
2763     rtx scratch = gen_reg_rtx (<MODE>mode);
2764     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2765     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2766     DONE;
2767   }
2770 (define_expand "aarch64_float_mla_lane<mode>"
2771   [(set (match_operand:VDQSF 0 "register_operand")
2772         (plus:VDQSF
2773           (mult:VDQSF
2774             (vec_duplicate:VDQSF
2775               (vec_select:<VEL>
2776                 (match_operand:V2SF 3 "register_operand")
2777                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2778             (match_operand:VDQSF 2 "register_operand"))
2779           (match_operand:VDQSF 1 "register_operand")))]
2780   "TARGET_SIMD"
2781   {
2782     rtx scratch = gen_reg_rtx (<MODE>mode);
2783     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2784                                     operands[3], operands[4]));
2785     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2786     DONE;
2787   }
2790 (define_expand "aarch64_float_mls_lane<mode>"
2791   [(set (match_operand:VDQSF 0 "register_operand")
2792         (minus:VDQSF
2793           (match_operand:VDQSF 1 "register_operand")
2794           (mult:VDQSF
2795             (vec_duplicate:VDQSF
2796               (vec_select:<VEL>
2797                 (match_operand:V2SF 3 "register_operand")
2798                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2799             (match_operand:VDQSF 2 "register_operand"))))]
2800   "TARGET_SIMD"
2801   {
2802     rtx scratch = gen_reg_rtx (<MODE>mode);
2803     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2804                                     operands[3], operands[4]));
2805     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2806     DONE;
2807   }
2810 (define_expand "aarch64_float_mla_laneq<mode>"
2811   [(set (match_operand:VDQSF 0 "register_operand")
2812         (plus:VDQSF
2813           (mult:VDQSF
2814             (vec_duplicate:VDQSF
2815               (vec_select:<VEL>
2816                 (match_operand:V4SF 3 "register_operand")
2817                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2818             (match_operand:VDQSF 2 "register_operand"))
2819           (match_operand:VDQSF 1 "register_operand")))]
2820   "TARGET_SIMD"
2821   {
2822     rtx scratch = gen_reg_rtx (<MODE>mode);
2823     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2824                                      operands[3], operands[4]));
2825     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2826     DONE;
2827   }
2830 (define_expand "aarch64_float_mls_laneq<mode>"
2831   [(set (match_operand:VDQSF 0 "register_operand")
2832         (minus:VDQSF
2833           (match_operand:VDQSF 1 "register_operand")
2834           (mult:VDQSF
2835             (vec_duplicate:VDQSF
2836               (vec_select:<VEL>
2837                 (match_operand:V4SF 3 "register_operand")
2838                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2839             (match_operand:VDQSF 2 "register_operand"))))]
2840   "TARGET_SIMD"
2841   {
2842     rtx scratch = gen_reg_rtx (<MODE>mode);
2843     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2844                                      operands[3], operands[4]));
2845     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2846     DONE;
2847   }
2850 (define_insn "fma<mode>4<vczle><vczbe>"
2851   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2852        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2853                   (match_operand:VHSDF 2 "register_operand" "w")
2854                   (match_operand:VHSDF 3 "register_operand" "0")))]
2855   "TARGET_SIMD"
2856  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2857   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2860 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2861   [(set (match_operand:VDQF 0 "register_operand" "=w")
2862     (fma:VDQF
2863       (vec_duplicate:VDQF
2864         (vec_select:<VEL>
2865           (match_operand:VDQF 1 "register_operand" "<h_con>")
2866           (parallel [(match_operand:SI 2 "immediate_operand")])))
2867       (match_operand:VDQF 3 "register_operand" "w")
2868       (match_operand:VDQF 4 "register_operand" "0")))]
2869   "TARGET_SIMD"
2870   {
2871     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2872     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2873   }
2874   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2877 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2878   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2879     (fma:VDQSF
2880       (vec_duplicate:VDQSF
2881         (vec_select:<VEL>
2882           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2883           (parallel [(match_operand:SI 2 "immediate_operand")])))
2884       (match_operand:VDQSF 3 "register_operand" "w")
2885       (match_operand:VDQSF 4 "register_operand" "0")))]
2886   "TARGET_SIMD"
2887   {
2888     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2889     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2890   }
2891   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2894 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2895   [(set (match_operand:VMUL 0 "register_operand" "=w")
2896     (fma:VMUL
2897       (vec_duplicate:VMUL
2898           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2899       (match_operand:VMUL 2 "register_operand" "w")
2900       (match_operand:VMUL 3 "register_operand" "0")))]
2901   "TARGET_SIMD"
2902   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2903   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2906 (define_insn "*aarch64_fma4_elt_to_64v2df"
2907   [(set (match_operand:DF 0 "register_operand" "=w")
2908     (fma:DF
2909         (vec_select:DF
2910           (match_operand:V2DF 1 "register_operand" "w")
2911           (parallel [(match_operand:SI 2 "immediate_operand")]))
2912       (match_operand:DF 3 "register_operand" "w")
2913       (match_operand:DF 4 "register_operand" "0")))]
2914   "TARGET_SIMD"
2915   {
2916     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2917     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2918   }
2919   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2922 (define_insn "fnma<mode>4<vczle><vczbe>"
2923   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2924         (fma:VHSDF
2925           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2926           (match_operand:VHSDF 2 "register_operand" "w")
2927           (match_operand:VHSDF 3 "register_operand" "0")))]
2928   "TARGET_SIMD"
2929   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2930   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2933 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2934   [(set (match_operand:VDQF 0 "register_operand" "=w")
2935     (fma:VDQF
2936       (neg:VDQF
2937         (match_operand:VDQF 3 "register_operand" "w"))
2938       (vec_duplicate:VDQF
2939         (vec_select:<VEL>
2940           (match_operand:VDQF 1 "register_operand" "<h_con>")
2941           (parallel [(match_operand:SI 2 "immediate_operand")])))
2942       (match_operand:VDQF 4 "register_operand" "0")))]
2943   "TARGET_SIMD"
2944   {
2945     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2946     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2947   }
2948   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2951 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2952   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2953     (fma:VDQSF
2954       (neg:VDQSF
2955         (match_operand:VDQSF 3 "register_operand" "w"))
2956       (vec_duplicate:VDQSF
2957         (vec_select:<VEL>
2958           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2959           (parallel [(match_operand:SI 2 "immediate_operand")])))
2960       (match_operand:VDQSF 4 "register_operand" "0")))]
2961   "TARGET_SIMD"
2962   {
2963     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2964     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2965   }
2966   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2969 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
2970   [(set (match_operand:VMUL 0 "register_operand" "=w")
2971     (fma:VMUL
2972       (neg:VMUL
2973         (match_operand:VMUL 2 "register_operand" "w"))
2974       (vec_duplicate:VMUL
2975         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2976       (match_operand:VMUL 3 "register_operand" "0")))]
2977   "TARGET_SIMD"
2978   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2979   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2982 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2983   [(set (match_operand:DF 0 "register_operand" "=w")
2984     (fma:DF
2985       (vec_select:DF
2986         (match_operand:V2DF 1 "register_operand" "w")
2987         (parallel [(match_operand:SI 2 "immediate_operand")]))
2988       (neg:DF
2989         (match_operand:DF 3 "register_operand" "w"))
2990       (match_operand:DF 4 "register_operand" "0")))]
2991   "TARGET_SIMD"
2992   {
2993     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2994     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2995   }
2996   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2999 ;; Vector versions of the floating-point frint patterns.
3000 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3001 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
3002   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3003         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3004                        FRINT))]
3005   "TARGET_SIMD"
3006   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3007   [(set_attr "type" "neon_fp_round_<stype><q>")]
3010 ;; Vector versions of the fcvt standard patterns.
3011 ;; Expands to lbtrunc, lround, lceil, lfloor
3012 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3013   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3014         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3015                                [(match_operand:VHSDF 1 "register_operand" "w")]
3016                                FCVT)))]
3017   "TARGET_SIMD"
3018   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3019   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3022 ;; HF Scalar variants of related SIMD instructions.
3023 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3024   [(set (match_operand:HI 0 "register_operand" "=w")
3025         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3026                       FCVT)))]
3027   "TARGET_SIMD_F16INST"
3028   "fcvt<frint_suffix><su>\t%h0, %h1"
3029   [(set_attr "type" "neon_fp_to_int_s")]
3032 (define_insn "<optab>_trunchfhi2"
3033   [(set (match_operand:HI 0 "register_operand" "=w")
3034         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3035   "TARGET_SIMD_F16INST"
3036   "fcvtz<su>\t%h0, %h1"
3037   [(set_attr "type" "neon_fp_to_int_s")]
3040 (define_insn "<optab>hihf2"
3041   [(set (match_operand:HF 0 "register_operand" "=w")
3042         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3043   "TARGET_SIMD_F16INST"
3044   "<su_optab>cvtf\t%h0, %h1"
3045   [(set_attr "type" "neon_int_to_fp_s")]
3048 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3049   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3050         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3051                                [(mult:VDQF
3052          (match_operand:VDQF 1 "register_operand" "w")
3053          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3054                                UNSPEC_FRINTZ)))]
3055   "TARGET_SIMD
3056    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3057                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3058   {
3059     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3060     char buf[64];
3061     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3062     output_asm_insn (buf, operands);
3063     return "";
3064   }
3065   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3068 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3069   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3070         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3071                                [(match_operand:VHSDF 1 "register_operand")]
3072                                 UNSPEC_FRINTZ)))]
3073   "TARGET_SIMD"
3074   {})
3076 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3077   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3078         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3079                                [(match_operand:VHSDF 1 "register_operand")]
3080                                 UNSPEC_FRINTZ)))]
3081   "TARGET_SIMD"
3082   {})
3084 (define_expand "ftrunc<VHSDF:mode>2"
3085   [(set (match_operand:VHSDF 0 "register_operand")
3086         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3087                        UNSPEC_FRINTZ))]
3088   "TARGET_SIMD"
3089   {})
3091 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3092   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3093         (FLOATUORS:VHSDF
3094           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3095   "TARGET_SIMD"
3096   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3097   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3100 ;; Conversions between vectors of floats and doubles.
3101 ;; Contains a mix of patterns to match standard pattern names
3102 ;; and those for intrinsics.
3104 ;; Float widening operations.
3106 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3107   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3108         (float_extend:<VWIDE> (vec_select:<VHALF>
3109                                (match_operand:VQ_HSF 1 "register_operand" "w")
3110                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3111                             )))]
3112   "TARGET_SIMD"
3113   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3114   [(set_attr "type" "neon_fp_cvt_widen_s")]
3117 ;; Convert between fixed-point and floating-point (vector modes)
3119 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3120   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3121         (unspec:<VHSDF:FCVT_TARGET>
3122           [(match_operand:VHSDF 1 "register_operand" "w")
3123            (match_operand:SI 2 "immediate_operand" "i")]
3124          FCVT_F2FIXED))]
3125   "TARGET_SIMD"
3126   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3127   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3130 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3131   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3132         (unspec:<VDQ_HSDI:FCVT_TARGET>
3133           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3134            (match_operand:SI 2 "immediate_operand" "i")]
3135          FCVT_FIXED2F))]
3136   "TARGET_SIMD"
3137   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3138   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3141 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3142 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3143 ;; the meaning of HI and LO changes depending on the target endianness.
3144 ;; While elsewhere we map the higher numbered elements of a vector to
3145 ;; the lower architectural lanes of the vector, for these patterns we want
3146 ;; to always treat "hi" as referring to the higher architectural lanes.
3147 ;; Consequently, while the patterns below look inconsistent with our
3148 ;; other big-endian patterns their behavior is as required.
3150 (define_expand "vec_unpacks_lo_<mode>"
3151   [(match_operand:<VWIDE> 0 "register_operand")
3152    (match_operand:VQ_HSF 1 "register_operand")]
3153   "TARGET_SIMD"
3154   {
3155     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3156     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3157                                                        operands[1], p));
3158     DONE;
3159   }
3162 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3163   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3164         (float_extend:<VWIDE> (vec_select:<VHALF>
3165                                (match_operand:VQ_HSF 1 "register_operand" "w")
3166                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3167                             )))]
3168   "TARGET_SIMD"
3169   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3170   [(set_attr "type" "neon_fp_cvt_widen_s")]
3173 (define_expand "vec_unpacks_hi_<mode>"
3174   [(match_operand:<VWIDE> 0 "register_operand")
3175    (match_operand:VQ_HSF 1 "register_operand")]
3176   "TARGET_SIMD"
3177   {
3178     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3180                                                        operands[1], p));
3181     DONE;
3182   }
3184 (define_insn "extend<mode><Vwide>2"
3185   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3186         (float_extend:<VWIDE>
3187           (match_operand:VDF 1 "register_operand" "w")))]
3188   "TARGET_SIMD"
3189   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3190   [(set_attr "type" "neon_fp_cvt_widen_s")]
3193 ;; Float narrowing operations.
3195 (define_insn "aarch64_float_trunc_rodd_df"
3196   [(set (match_operand:SF 0 "register_operand" "=w")
3197         (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3198                 UNSPEC_FCVTXN))]
3199   "TARGET_SIMD"
3200   "fcvtxn\\t%s0, %d1"
3201   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3204 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3205   [(set (match_operand:V2SF 0 "register_operand" "=w")
3206         (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3207                 UNSPEC_FCVTXN))]
3208   "TARGET_SIMD"
3209   "fcvtxn\\t%0.2s, %1.2d"
3210   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3213 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3214   [(set (match_operand:V4SF 0 "register_operand" "=w")
3215         (vec_concat:V4SF
3216           (match_operand:V2SF 1 "register_operand" "0")
3217           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3218                 UNSPEC_FCVTXN)))]
3219   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3220   "fcvtxn2\\t%0.4s, %2.2d"
3221   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3224 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3225   [(set (match_operand:V4SF 0 "register_operand" "=w")
3226         (vec_concat:V4SF
3227           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3228                 UNSPEC_FCVTXN)
3229           (match_operand:V2SF 1 "register_operand" "0")))]
3230   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3231   "fcvtxn2\\t%0.4s, %2.2d"
3232   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3235 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3236   [(match_operand:V4SF 0 "register_operand")
3237    (match_operand:V2SF 1 "register_operand")
3238    (match_operand:V2DF 2 "register_operand")]
3239   "TARGET_SIMD"
3241   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3242                              ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3243                              : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3244   emit_insn (gen (operands[0], operands[1], operands[2]));
3245   DONE;
3249 (define_insn "trunc<Vwide><mode>2<vczle><vczbe>"
3250   [(set (match_operand:VDF 0 "register_operand" "=w")
3251       (float_truncate:VDF
3252         (match_operand:<VWIDE> 1 "register_operand" "w")))]
3253   "TARGET_SIMD"
3254   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3255   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3258 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3259   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3260     (vec_concat:<VDBL>
3261       (match_operand:VDF 1 "register_operand" "0")
3262       (float_truncate:VDF
3263         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3264   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3265   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3266   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3269 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3270   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3271     (vec_concat:<VDBL>
3272       (float_truncate:VDF
3273         (match_operand:<VWIDE> 2 "register_operand" "w"))
3274       (match_operand:VDF 1 "register_operand" "0")))]
3275   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3276   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3277   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3280 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3281   [(match_operand:<VDBL> 0 "register_operand")
3282    (match_operand:VDF 1 "register_operand")
3283    (match_operand:<VWIDE> 2 "register_operand")]
3284   "TARGET_SIMD"
3286   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3287                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3288                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3289   emit_insn (gen (operands[0], operands[1], operands[2]));
3290   DONE;
3294 (define_expand "vec_pack_trunc_v2df"
3295   [(set (match_operand:V4SF 0 "register_operand")
3296       (vec_concat:V4SF
3297         (float_truncate:V2SF
3298             (match_operand:V2DF 1 "register_operand"))
3299         (float_truncate:V2SF
3300             (match_operand:V2DF 2 "register_operand"))
3301           ))]
3302   "TARGET_SIMD"
3303   {
3304     rtx tmp = gen_reg_rtx (V2SFmode);
3305     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3306     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3308     emit_insn (gen_truncv2dfv2sf2 (tmp, operands[lo]));
3309     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3310                                                    tmp, operands[hi]));
3311     DONE;
3312   }
3315 (define_expand "vec_pack_trunc_df"
3316   [(set (match_operand:V2SF 0 "register_operand")
3317         (vec_concat:V2SF
3318           (float_truncate:SF (match_operand:DF 1 "general_operand"))
3319           (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3320   "TARGET_SIMD"
3321   {
3322     rtx tmp = gen_reg_rtx (V2SFmode);
3323     emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3324     emit_insn (gen_truncv2dfv2sf2 (operands[0], tmp));
3325     DONE;
3326   }
3329 ;; FP Max/Min
3330 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
3331 ;; expression like:
3332 ;;      a = (b < c) ? b : c;
3333 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3334 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3335 ;; -ffast-math.
3337 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3338 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3339 ;; operand will be returned when both operands are zero (i.e. they may not
3340 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
3341 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3342 ;; NaNs.
3344 (define_insn "<su><maxmin><mode>3"
3345   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3346         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3347                        (match_operand:VHSDF 2 "register_operand" "w")))]
3348   "TARGET_SIMD"
3349   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3350   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3353 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3354 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3355 ;; which implement the IEEE fmax ()/fmin () functions.
3356 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3357   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3358        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3359                       (match_operand:VHSDF 2 "register_operand" "w")]
3360                       FMAXMIN_UNS))]
3361   "TARGET_SIMD"
3362   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3363   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3366 ;; 'across lanes' add.
3368 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3369  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3370        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3371                       (match_operand:VHSDF 2 "register_operand" "w")]
3372         UNSPEC_FADDV))]
3373  "TARGET_SIMD"
3374  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3375   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3378 (define_insn "reduc_plus_scal_<mode>"
3379  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3380        (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3381                     UNSPEC_ADDV))]
3382  "TARGET_SIMD"
3383  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3384   [(set_attr "type" "neon_reduc_add<q>")]
3387 (define_insn "reduc_plus_scal_v2si"
3388  [(set (match_operand:SI 0 "register_operand" "=w")
3389        (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3390                     UNSPEC_ADDV))]
3391  "TARGET_SIMD"
3392  "addp\\t%0.2s, %1.2s, %1.2s"
3393   [(set_attr "type" "neon_reduc_add")]
3396 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3397 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3398  [(set (match_operand:GPI 0 "register_operand" "=w")
3399        (zero_extend:GPI
3400         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3401                              UNSPEC_ADDV)))]
3402  "TARGET_SIMD"
3403  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3404   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3407 (define_insn "reduc_plus_scal_<mode>"
3408  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3409        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3410                    UNSPEC_FADDV))]
3411  "TARGET_SIMD"
3412  "faddp\\t%<Vetype>0, %1.<Vtype>"
3413   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3416 (define_expand "reduc_plus_scal_v4sf"
3417  [(set (match_operand:SF 0 "register_operand")
3418        (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3419                     UNSPEC_FADDV))]
3420  "TARGET_SIMD"
3422   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3423   rtx scratch = gen_reg_rtx (V4SFmode);
3424   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3425   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3426   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3427   DONE;
3430 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3431 ;; sign or zero-extends its elements.
3432 (define_insn "aarch64_<su>addlv<mode>"
3433  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3434        (unspec:<VWIDE_S>
3435          [(ANY_EXTEND:<V2XWIDE>
3436             (match_operand:VDQV_L 1 "register_operand" "w"))]
3437          UNSPEC_ADDV))]
3438  "TARGET_SIMD"
3439  "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3440   [(set_attr "type" "neon_reduc_add<q>")]
3443 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3444 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3445 ;; of that vector are used.  We can greatly simplify the RTL expression using
3446 ;; this splitter.
3447 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3448  [(set (match_operand:<VWIDE_S> 0 "register_operand")
3449        (unspec:<VWIDE_S>
3450          [(plus:<VDBLW>
3451             (vec_select:<VDBLW>
3452               (ANY_EXTEND:<V2XWIDE>
3453                 (match_operand:VDQV_L 1 "register_operand"))
3454               (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3455             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3456               (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3457          UNSPEC_ADDV))]
3458  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3459  "#"
3460  "&& 1"
3461   [(set (match_dup 0)
3462        (unspec:<VWIDE_S>
3463          [(ANY_EXTEND:<V2XWIDE>
3464             (match_dup 1))]
3465          UNSPEC_ADDV))]
3466   {}
3469 ;; Similar to the above but for two-step zero-widening reductions.
3470 ;; We can push the outer zero_extend outside the ADDV unspec and make
3471 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3472 ;; in a single instruction.
3473 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3474  [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3475        (unspec:<VWIDE2X_S>
3476          [(zero_extend:<VQUADW>
3477             (plus:<VDBLW>
3478               (vec_select:<VDBLW>
3479                 (zero_extend:<V2XWIDE>
3480                   (match_operand:VDQQH 1 "register_operand" "w"))
3481                 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3482               (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3483                 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3484          UNSPEC_ADDV))]
3485  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3486  "#"
3487  "&& 1"
3488   [(set (match_dup 0)
3489         (zero_extend:<VWIDE2X_S>
3490           (unspec:<VWIDE_S>
3491             [(zero_extend:<V2XWIDE>
3492                (match_dup 1))]
3493             UNSPEC_ADDV)))]
3494   {}
3497 ;; Zero-extending version of the above.  As these intrinsics produce a scalar
3498 ;; value that may be used by further intrinsics we want to avoid moving the
3499 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3501 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3502  [(set (match_operand:GPI 0 "register_operand" "=w")
3503        (zero_extend:GPI
3504          (unspec:<VWIDE_S>
3505            [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3506               (match_operand:VDQV_L 1 "register_operand" "w"))]
3507          UNSPEC_ADDV)))]
3508  "TARGET_SIMD
3509   && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3510  "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3511   [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3514 (define_expand "@aarch64_<su>addlp<mode>"
3515   [(set (match_operand:<VDBLW> 0 "register_operand")
3516         (plus:<VDBLW>
3517           (vec_select:<VDBLW>
3518             (ANY_EXTEND:<V2XWIDE>
3519               (match_operand:VDQV_L 1 "register_operand"))
3520             (match_dup 2))
3521           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3522             (match_dup 3))))]
3523  "TARGET_SIMD"
3525    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3526    operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3527    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3531 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3532   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3533         (plus:<VDBLW>
3534           (vec_select:<VDBLW>
3535             (ANY_EXTEND:<V2XWIDE>
3536               (match_operand:VDQV_L 1 "register_operand" "w"))
3537             (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3538           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3539             (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3540  "TARGET_SIMD
3541   && !rtx_equal_p (operands[2], operands[3])"
3542  "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3543   [(set_attr "type" "neon_reduc_add<q>")]
3546 (define_insn "clrsb<mode>2<vczle><vczbe>"
3547   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3548         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3549   "TARGET_SIMD"
3550   "cls\\t%0.<Vtype>, %1.<Vtype>"
3551   [(set_attr "type" "neon_cls<q>")]
3554 (define_insn "clz<mode>2<vczle><vczbe>"
3555  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3556        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3557  "TARGET_SIMD"
3558  "clz\\t%0.<Vtype>, %1.<Vtype>"
3559   [(set_attr "type" "neon_cls<q>")]
3562 (define_insn "popcount<mode>2<vczle><vczbe>"
3563   [(set (match_operand:VB 0 "register_operand" "=w")
3564         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3565   "TARGET_SIMD"
3566   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3567   [(set_attr "type" "neon_cnt<q>")]
3570 (define_expand "popcount<mode>2"
3571   [(set (match_operand:VDQHSD_V1DI 0 "register_operand")
3572         (popcount:VDQHSD_V1DI
3573           (match_operand:VDQHSD_V1DI 1 "register_operand")))]
3574   "TARGET_SIMD"
3575   {
3576     if (TARGET_SVE)
3577       {
3578         rtx p = aarch64_ptrue_reg (<VPRED>mode, <bitsize> == 64 ? 8 : 16);
3579         emit_insn (gen_aarch64_pred_popcount<mode> (operands[0],
3580                                                     p,
3581                                                     operands[1]));
3582         DONE;
3583       }
3585     if (<MODE>mode == V1DImode)
3586       {
3587         rtx out = gen_reg_rtx (DImode);
3588         emit_insn (gen_popcountdi2 (out, gen_lowpart (DImode, operands[1])));
3589         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, out));
3590         DONE;
3591       }
3593     /* Generate a byte popcount.  */
3594     machine_mode mode = <bitsize> == 64 ? V8QImode : V16QImode;
3595     machine_mode mode2 = <bitsize> == 64 ? V2SImode : V4SImode;
3596     rtx tmp = gen_reg_rtx (mode);
3597     auto icode = optab_handler (popcount_optab, mode);
3598     emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode, operands[1])));
3600     if (TARGET_DOTPROD
3601         && (<VEL>mode == SImode || <VEL>mode == DImode))
3602       {
3603         /* For V4SI and V2SI, we can generate a UDOT with a 0 accumulator and a
3604            1 multiplicand.  For V2DI, another UAADDLP is needed.  */
3605         rtx ones = force_reg (mode, CONST1_RTX (mode));
3606         auto icode = convert_optab_handler (udot_prod_optab, mode2, mode);
3607         mode = <bitsize> == 64 ? V2SImode : V4SImode;
3608         rtx dest = mode == <MODE>mode ? operands[0] : gen_reg_rtx (mode);
3609         rtx zeros = force_reg (mode, CONST0_RTX (mode));
3610         emit_insn (GEN_FCN (icode) (dest, tmp, ones, zeros));
3611         tmp = dest;
3612       }
3614     /* Use a sequence of UADDLPs to accumulate the counts.  Each step doubles
3615        the element size and halves the number of elements.  */
3616     while (mode != <MODE>mode)
3617       {
3618         auto icode = code_for_aarch64_addlp (ZERO_EXTEND, GET_MODE (tmp));
3619         mode = insn_data[icode].operand[0].mode;
3620         rtx dest = mode == <MODE>mode ? operands[0] : gen_reg_rtx (mode);
3621         emit_insn (GEN_FCN (icode) (dest, tmp));
3622         tmp = dest;
3623       }
3624     DONE;
3625   }
3628 ;; 'across lanes' max and min ops.
3630 ;; Template for outputting a scalar, so we can create __builtins which can be
3631 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
3632 (define_expand "reduc_<optab>_scal_<mode>"
3633   [(match_operand:<VEL> 0 "register_operand")
3634    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3635                  FMAXMINV)]
3636   "TARGET_SIMD"
3637   {
3638     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3639     rtx scratch = gen_reg_rtx (<MODE>mode);
3640     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3641                                                          operands[1]));
3642     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3643     DONE;
3644   }
3647 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3648   [(match_operand:<VEL> 0 "register_operand")
3649    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3650                  FMAXMINNMV)]
3651   "TARGET_SIMD"
3652   {
3653     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3654     DONE;
3655   }
3658 ;; Likewise for integer cases, signed and unsigned.
3659 (define_expand "reduc_<optab>_scal_<mode>"
3660   [(match_operand:<VEL> 0 "register_operand")
3661    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3662                     MAXMINV)]
3663   "TARGET_SIMD"
3664   {
3665     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3666     rtx scratch = gen_reg_rtx (<MODE>mode);
3667     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3668                                                          operands[1]));
3669     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3670     DONE;
3671   }
3674 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3675  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3676        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3677                     MAXMINV))]
3678  "TARGET_SIMD"
3679  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3680   [(set_attr "type" "neon_reduc_minmax<q>")]
3683 (define_insn "aarch64_reduc_<optab>_internalv2si"
3684  [(set (match_operand:V2SI 0 "register_operand" "=w")
3685        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3686                     MAXMINV))]
3687  "TARGET_SIMD"
3688  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3689   [(set_attr "type" "neon_reduc_minmax")]
3692 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3693  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3694        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3695                       FMAXMINV))]
3696  "TARGET_SIMD"
3697  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3698   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3701 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3702 ;; allocation.
3703 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3704 ;; to select.
3706 ;; Thus our BSL is of the form:
3707 ;;   op0 = bsl (mask, op2, op3)
3708 ;; We can use any of:
3710 ;;   if (op0 = mask)
3711 ;;     bsl mask, op1, op2
3712 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3713 ;;     bit op0, op2, mask
3714 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3715 ;;     bif op0, op1, mask
3717 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3718 ;; Some forms of straight-line code may generate the equivalent form
3719 ;; in *aarch64_simd_bsl<mode>_alt.
3721 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3722   [(set (match_operand:VDQ_I 0 "register_operand")
3723         (xor:VDQ_I
3724            (and:VDQ_I
3725              (xor:VDQ_I
3726                (match_operand:<V_INT_EQUIV> 3 "register_operand")
3727                (match_operand:VDQ_I 2 "register_operand"))
3728              (match_operand:VDQ_I 1 "register_operand"))
3729           (match_dup:<V_INT_EQUIV> 3)
3730         ))]
3731   "TARGET_SIMD"
3732   {@ [ cons: =0 , 1 , 2 , 3  ]
3733      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3734      [ w        , w , w , 0  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3735      [ w        , w , 0 , w  ] bif\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3736   }
3737   [(set_attr "type" "neon_bsl<q>")]
3740 ;; We need this form in addition to the above pattern to match the case
3741 ;; when combine tries merging three insns such that the second operand of
3742 ;; the outer XOR matches the second operand of the inner XOR rather than
3743 ;; the first.  The two are equivalent but since recog doesn't try all
3744 ;; permutations of commutative operations, we have to have a separate pattern.
3746 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3747   [(set (match_operand:VDQ_I 0 "register_operand")
3748         (xor:VDQ_I
3749            (and:VDQ_I
3750              (xor:VDQ_I
3751                (match_operand:VDQ_I 3 "register_operand")
3752                (match_operand:<V_INT_EQUIV> 2 "register_operand"))
3753               (match_operand:VDQ_I 1 "register_operand"))
3754           (match_dup:<V_INT_EQUIV> 2)))]
3755   "TARGET_SIMD"
3756   {@ [ cons: =0 , 1 , 2 , 3  ]
3757      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3758      [ w        , w , 0 , w  ] bit\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3759      [ w        , w , w , 0  ] bif\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3760   }
3761   [(set_attr "type" "neon_bsl<q>")]
3764 ;; DImode is special, we want to avoid computing operations which are
3765 ;; more naturally computed in general purpose registers in the vector
3766 ;; registers.  If we do that, we need to move all three operands from general
3767 ;; purpose registers to vector registers, then back again.  However, we
3768 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3769 ;; optimizations based on the component operations of a BSL.
3771 ;; That means we need a splitter back to the individual operations, if they
3772 ;; would be better calculated on the integer side.
3774 (define_insn_and_split "aarch64_simd_bsldi_internal"
3775   [(set (match_operand:DI 0 "register_operand")
3776         (xor:DI
3777            (and:DI
3778              (xor:DI
3779                (match_operand:DI 3 "register_operand")
3780                (match_operand:DI 2 "register_operand"))
3781              (match_operand:DI 1 "register_operand"))
3782           (match_dup:DI 3)
3783         ))]
3784   "TARGET_SIMD"
3785   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3786      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %2.8b, %3.8b
3787      [ w        , w , w , 0 ; neon_bsl    , 4      ] bit\t%0.8b, %2.8b, %1.8b
3788      [ w        , w , 0 , w ; neon_bsl    , 4      ] bif\t%0.8b, %3.8b, %1.8b
3789      [ &r       , r , r , r ; multiple    , 12     ] #
3790   }
3791   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3792   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3794   /* Split back to individual operations.  If we're before reload, and
3795      able to create a temporary register, do so.  If we're after reload,
3796      we've got an early-clobber destination register, so use that.
3797      Otherwise, we can't create pseudos and we can't yet guarantee that
3798      operands[0] is safe to write, so FAIL to split.  */
3800   rtx scratch;
3801   if (reload_completed)
3802     scratch = operands[0];
3803   else if (can_create_pseudo_p ())
3804     scratch = gen_reg_rtx (DImode);
3805   else
3806     FAIL;
3808   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3809   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3810   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3811   DONE;
3815 (define_insn_and_split "aarch64_simd_bsldi_alt"
3816   [(set (match_operand:DI 0 "register_operand")
3817         (xor:DI
3818            (and:DI
3819              (xor:DI
3820                (match_operand:DI 3 "register_operand")
3821                (match_operand:DI 2 "register_operand"))
3822              (match_operand:DI 1 "register_operand"))
3823           (match_dup:DI 2)
3824         ))]
3825   "TARGET_SIMD"
3826   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3827      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %3.8b, %2.8b
3828      [ w        , w , 0 , w ; neon_bsl    , 4      ] bit\t%0.8b, %3.8b, %1.8b
3829      [ w        , w , w , 0 ; neon_bsl    , 4      ] bif\t%0.8b, %2.8b, %1.8b
3830      [ &r       , r , r , r ; multiple    , 12     ] #
3831   }
3832   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3833   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3835   /* Split back to individual operations.  If we're before reload, and
3836      able to create a temporary register, do so.  If we're after reload,
3837      we've got an early-clobber destination register, so use that.
3838      Otherwise, we can't create pseudos and we can't yet guarantee that
3839      operands[0] is safe to write, so FAIL to split.  */
3841   rtx scratch;
3842   if (reload_completed)
3843     scratch = operands[0];
3844   else if (can_create_pseudo_p ())
3845     scratch = gen_reg_rtx (DImode);
3846   else
3847     FAIL;
3849   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3850   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3851   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3852   DONE;
3856 (define_expand "@aarch64_simd_bsl<mode>"
3857   [(match_operand:VALLDIF 0 "register_operand")
3858    (match_operand:<V_INT_EQUIV> 1 "register_operand")
3859    (match_operand:VALLDIF 2 "register_operand")
3860    (match_operand:VALLDIF 3 "register_operand")]
3861  "TARGET_SIMD"
3863   /* We can't alias operands together if they have different modes.  */
3864   rtx tmp = operands[0];
3865   if (FLOAT_MODE_P (<MODE>mode))
3866     {
3867       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3868       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3869       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3870     }
3871   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3872   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3873                                                          operands[1],
3874                                                          operands[2],
3875                                                          operands[3]));
3876   if (tmp != operands[0])
3877     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3879   DONE;
3882 (define_expand "vcond_mask_<mode><v_int_equiv>"
3883   [(match_operand:VALLDI 0 "register_operand")
3884    (match_operand:VALLDI 1 "nonmemory_operand")
3885    (match_operand:VALLDI 2 "nonmemory_operand")
3886    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3887   "TARGET_SIMD"
3889   /* If we have (a = (P) ? -1 : 0);
3890      Then we can simply move the generated mask (result must be int).  */
3891   if (operands[1] == CONSTM1_RTX (<MODE>mode)
3892       && operands[2] == CONST0_RTX (<MODE>mode))
3893     emit_move_insn (operands[0], operands[3]);
3894   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
3895   else if (operands[1] == CONST0_RTX (<MODE>mode)
3896            && operands[2] == CONSTM1_RTX (<MODE>mode))
3897     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3898   else
3899     {
3900       if (!REG_P (operands[1]))
3901         operands[1] = force_reg (<MODE>mode, operands[1]);
3902       if (!REG_P (operands[2]))
3903         operands[2] = force_reg (<MODE>mode, operands[2]);
3904       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3905                                              operands[1], operands[2]));
3906     }
3908   DONE;
3911 ;; Patterns comparing two vectors and conditionally jump
3913 (define_expand "cbranch<mode>4"
3914   [(set (pc)
3915         (if_then_else
3916           (match_operator 0 "aarch64_equality_operator"
3917             [(match_operand:VDQ_I 1 "register_operand")
3918              (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")])
3919           (label_ref (match_operand 3 ""))
3920           (pc)))]
3921   "TARGET_SIMD"
3923   auto code = GET_CODE (operands[0]);
3924   rtx tmp = operands[1];
3926   /* If comparing against a non-zero vector we have to do a comparison first
3927      so we can have a != 0 comparison with the result.  */
3928   if (operands[2] != CONST0_RTX (<MODE>mode))
3929     {
3930       tmp = gen_reg_rtx (<MODE>mode);
3931       emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2]));
3932     }
3934   /* For 64-bit vectors we need no reductions.  */
3935   if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
3936     {
3937       /* Always reduce using a V4SI.  */
3938       rtx reduc = gen_lowpart (V4SImode, tmp);
3939       rtx res = gen_reg_rtx (V4SImode);
3940       emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
3941       emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
3942     }
3944   rtx val = gen_reg_rtx (DImode);
3945   emit_move_insn (val, gen_lowpart (DImode, tmp));
3947   rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
3948   rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
3949   emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
3950   DONE;
3953 ;; Patterns comparing two vectors to produce a mask.
3955 (define_expand "vec_cmp<mode><mode>"
3956   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3957           (match_operator 1 "comparison_operator"
3958             [(match_operand:VSDQ_I_DI 2 "register_operand")
3959              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3960   "TARGET_SIMD"
3962   rtx mask = operands[0];
3963   enum rtx_code code = GET_CODE (operands[1]);
3965   switch (code)
3966     {
3967     case NE:
3968     case LE:
3969     case LT:
3970     case GE:
3971     case GT:
3972     case EQ:
3973       if (operands[3] == CONST0_RTX (<MODE>mode))
3974         break;
3976       /* Fall through.  */
3977     default:
3978       if (!REG_P (operands[3]))
3979         operands[3] = force_reg (<MODE>mode, operands[3]);
3981       break;
3982     }
3984   switch (code)
3985     {
3986     case LT:
3987       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3988       break;
3990     case GE:
3991       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3992       break;
3994     case LE:
3995       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3996       break;
3998     case GT:
3999       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
4000       break;
4002     case LTU:
4003       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
4004       break;
4006     case GEU:
4007       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
4008       break;
4010     case LEU:
4011       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
4012       break;
4014     case GTU:
4015       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
4016       break;
4018     case NE:
4019       /* Handle NE as !EQ.  */
4020       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
4021       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
4022       break;
4024     case EQ:
4025       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
4026       break;
4028     default:
4029       gcc_unreachable ();
4030     }
4032   DONE;
4035 (define_expand "vec_cmp<mode><v_int_equiv>"
4036   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
4037         (match_operator 1 "comparison_operator"
4038             [(match_operand:VDQF 2 "register_operand")
4039              (match_operand:VDQF 3 "nonmemory_operand")]))]
4040   "TARGET_SIMD"
4042   int use_zero_form = 0;
4043   enum rtx_code code = GET_CODE (operands[1]);
4044   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
4046   rtx (*comparison) (rtx, rtx, rtx) = NULL;
4048   switch (code)
4049     {
4050     case LE:
4051     case LT:
4052     case GE:
4053     case GT:
4054     case EQ:
4055       if (operands[3] == CONST0_RTX (<MODE>mode))
4056         {
4057           use_zero_form = 1;
4058           break;
4059         }
4060       /* Fall through.  */
4061     default:
4062       if (!REG_P (operands[3]))
4063         operands[3] = force_reg (<MODE>mode, operands[3]);
4065       break;
4066     }
4068   switch (code)
4069     {
4070     case LT:
4071       if (use_zero_form)
4072         {
4073           comparison = gen_aarch64_cmlt<mode>;
4074           break;
4075         }
4076       /* Fall through.  */
4077     case UNLT:
4078       std::swap (operands[2], operands[3]);
4079       /* Fall through.  */
4080     case UNGT:
4081     case GT:
4082       comparison = gen_aarch64_cmgt<mode>;
4083       break;
4084     case LE:
4085       if (use_zero_form)
4086         {
4087           comparison = gen_aarch64_cmle<mode>;
4088           break;
4089         }
4090       /* Fall through.  */
4091     case UNLE:
4092       std::swap (operands[2], operands[3]);
4093       /* Fall through.  */
4094     case UNGE:
4095     case GE:
4096       comparison = gen_aarch64_cmge<mode>;
4097       break;
4098     case NE:
4099     case EQ:
4100       comparison = gen_aarch64_cmeq<mode>;
4101       break;
4102     case UNEQ:
4103     case ORDERED:
4104     case UNORDERED:
4105     case LTGT:
4106       break;
4107     default:
4108       gcc_unreachable ();
4109     }
4111   switch (code)
4112     {
4113     case UNGE:
4114     case UNGT:
4115     case UNLE:
4116     case UNLT:
4117       {
4118         /* All of the above must not raise any FP exceptions.  Thus we first
4119            check each operand for NaNs and force any elements containing NaN to
4120            zero before using them in the compare.
4121            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4122                                      (cm<cc> (isnan (a) ? 0.0 : a,
4123                                               isnan (b) ? 0.0 : b))
4124            We use the following transformations for doing the comparisions:
4125            a UNGE b -> a GE b
4126            a UNGT b -> a GT b
4127            a UNLE b -> b GE a
4128            a UNLT b -> b GT a.  */
4130         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4131         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4132         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4133         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4134         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4135         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4136         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4137                                           lowpart_subreg (<V_INT_EQUIV>mode,
4138                                                           operands[2],
4139                                                           <MODE>mode)));
4140         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4141                                           lowpart_subreg (<V_INT_EQUIV>mode,
4142                                                           operands[3],
4143                                                           <MODE>mode)));
4144         gcc_assert (comparison != NULL);
4145         emit_insn (comparison (operands[0],
4146                                lowpart_subreg (<MODE>mode,
4147                                                tmp0, <V_INT_EQUIV>mode),
4148                                lowpart_subreg (<MODE>mode,
4149                                                tmp1, <V_INT_EQUIV>mode)));
4150         emit_insn (gen_iorn<v_int_equiv>3 (operands[0], operands[0], tmp2));
4151       }
4152       break;
4154     case LT:
4155     case LE:
4156     case GT:
4157     case GE:
4158     case EQ:
4159     case NE:
4160       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
4161          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
4162          a GE b -> a GE b
4163          a GT b -> a GT b
4164          a LE b -> b GE a
4165          a LT b -> b GT a
4166          a EQ b -> a EQ b
4167          a NE b -> ~(a EQ b)  */
4168       gcc_assert (comparison != NULL);
4169       emit_insn (comparison (operands[0], operands[2], operands[3]));
4170       if (code == NE)
4171         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4172       break;
4174     case LTGT:
4175       /* LTGT is not guranteed to not generate a FP exception.  So let's
4176          go the faster way : ((a > b) || (b > a)).  */
4177       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4178                                          operands[2], operands[3]));
4179       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4180       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4181       break;
4183     case ORDERED:
4184     case UNORDERED:
4185     case UNEQ:
4186       /* cmeq (a, a) & cmeq (b, b).  */
4187       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4188                                          operands[2], operands[2]));
4189       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4190       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4192       if (code == UNORDERED)
4193         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4194       else if (code == UNEQ)
4195         {
4196           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4197           emit_insn (gen_iorn<v_int_equiv>3 (operands[0], tmp, operands[0]));
4198         }
4199       break;
4201     default:
4202       gcc_unreachable ();
4203     }
4205   DONE;
4208 (define_expand "vec_cmpu<mode><mode>"
4209   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4210           (match_operator 1 "comparison_operator"
4211             [(match_operand:VSDQ_I_DI 2 "register_operand")
4212              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4213   "TARGET_SIMD"
4215   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4216                                       operands[2], operands[3]));
4217   DONE;
4220 ;; Patterns for AArch64 SIMD Intrinsics.
4222 ;; Lane extraction with sign extension to general purpose register.
4223 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4224   [(set (match_operand:GPI 0 "register_operand" "=r")
4225         (sign_extend:GPI
4226           (vec_select:<VDQQH:VEL>
4227             (match_operand:VDQQH 1 "register_operand" "w")
4228             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4229   "TARGET_SIMD"
4230   {
4231     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4232                                            INTVAL (operands[2]));
4233     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4234   }
4235   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4238 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4239   [(set (match_operand:GPI 0 "register_operand" "=r")
4240         (zero_extend:GPI
4241           (vec_select:<VDQQH:VEL>
4242             (match_operand:VDQQH 1 "register_operand" "w")
4243             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4244   "TARGET_SIMD"
4245   {
4246     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4247                                            INTVAL (operands[2]));
4248     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4249   }
4250   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4253 ;; Lane extraction of a value, neither sign nor zero extension
4254 ;; is guaranteed so upper bits should be considered undefined.
4255 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4256 ;; Extracting lane zero is split into a simple move when it is between SIMD
4257 ;; registers or a store.
4258 (define_insn_and_split "@aarch64_get_lane<mode>"
4259   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4260         (vec_select:<VEL>
4261           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4262           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4263   "TARGET_SIMD"
4264   {
4265     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4266     switch (which_alternative)
4267       {
4268         case 0:
4269           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4270         case 1:
4271           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4272         case 2:
4273           return "st1\\t{%1.<Vetype>}[%2], %0";
4274         default:
4275           gcc_unreachable ();
4276       }
4277   }
4278  "&& reload_completed
4279   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4280  [(set (match_dup 0) (match_dup 1))]
4282    operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4284   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4287 (define_insn "*aarch64_get_high<mode>"
4288   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4289         (vec_select:<VEL>
4290           (match_operand:VQ_2E 1 "register_operand" "w")
4291           (parallel [(match_operand:SI 2 "immediate_operand")])))]
4292   "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4293   "fmov\t%0, %1.d[1]"
4294   [(set_attr "type" "f_mrc")]
4297 (define_insn "load_pair_lanes<mode>"
4298   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4299         (vec_concat:<VDBL>
4300            (match_operand:VDCSIF 1 "memory_operand" "Utq")
4301            (match_operand:VDCSIF 2 "memory_operand" "m")))]
4302   "TARGET_FLOAT
4303    && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4304   "ldr\\t%<single_dtype>0, %1"
4305   [(set_attr "type" "neon_load1_1reg<dblq>")]
4308 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4309 ;; below.  The reason for having both of them is that the alternatives of
4310 ;; the later patterns do not have consistent register preferences: the STP
4311 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4312 ;; the GPR form is more natural for scalar integers) whereas the other
4313 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4315 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4316 ;; which the destination was always memory.  On the other hand, expressing
4317 ;; the true preferences makes GPRs seem more palatable than they really are
4318 ;; for register destinations.
4320 ;; Despite that, we do still want the general form to have STP alternatives,
4321 ;; in order to handle cases where a register destination is spilled.
4323 ;; The best compromise therefore seemed to be to have a dedicated STP
4324 ;; pattern to catch cases in which the destination was always memory.
4325 ;; This dedicated pattern must come first.
4327 (define_insn "store_pair_lanes<mode>"
4328   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand")
4329         (vec_concat:<VDBL>
4330            (match_operand:VDCSIF 1 "register_operand")
4331            (match_operand:VDCSIF 2 "register_operand")))]
4332   "TARGET_FLOAT"
4333   {@ [ cons: =0 , 1 , 2 ; attrs: type ]
4334      [ Umn      , w , w ; neon_stp    ] stp\t%<single_type>1, %<single_type>2, %y0
4335      [ Umn      , r , r ; store_16    ] stp\t%<single_wx>1, %<single_wx>2, %y0
4336   }
4339 ;; Form a vector whose least significant half comes from operand 1 and whose
4340 ;; most significant half comes from operand 2.  The register alternatives
4341 ;; tie the least significant half to the same register as the destination,
4342 ;; so that only the other half needs to be handled explicitly.  For the
4343 ;; reasons given above, the STP alternatives use ? for constraints that
4344 ;; the register alternatives either don't accept or themselves disparage.
4346 (define_insn "*aarch64_combine_internal<mode>"
4347   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4348         (vec_concat:<VDBL>
4349           (match_operand:VDCSIF 1 "register_operand")
4350           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")))]
4351   "TARGET_FLOAT
4352    && !BYTES_BIG_ENDIAN
4353    && (register_operand (operands[0], <VDBL>mode)
4354        || register_operand (operands[2], <MODE>mode))"
4355   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4356      [ w        , w  , w   ; neon_permute<dblq>        , simd  ] uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
4357      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4358      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4359      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4360      [ Umn      , ?w , w   ; neon_stp                  , *     ] stp\t%<single_type>1, %<single_type>2, %y0
4361      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>1, %<single_wx>2, %y0
4362   }
4365 (define_insn "*aarch64_combine_internal_be<mode>"
4366   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4367         (vec_concat:<VDBL>
4368           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")
4369           (match_operand:VDCSIF 1 "register_operand")))]
4370   "TARGET_FLOAT
4371    && BYTES_BIG_ENDIAN
4372    && (register_operand (operands[0], <VDBL>mode)
4373        || register_operand (operands[2], <MODE>mode))"
4374   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4375      [ w        , w  , w   ; neon_permute<dblq>        , simd  ] uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
4376      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4377      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4378      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4379      [ Umn      , ?w , ?w  ; neon_stp                  , *     ] stp\t%<single_type>2, %<single_type>1, %y0
4380      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>2, %<single_wx>1, %y0
4381   }
4384 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4385 ;; dest vector.
4387 (define_insn "*aarch64_combinez<mode>"
4388   [(set (match_operand:<VDBL> 0 "register_operand")
4389         (vec_concat:<VDBL>
4390           (match_operand:VDCSIF 1 "nonimmediate_operand")
4391           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4392   "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4393   {@ [ cons: =0 , 1  ; attrs: type      ]
4394      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4395      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4396      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4397   }
4400 (define_insn "*aarch64_combinez_be<mode>"
4401   [(set (match_operand:<VDBL> 0 "register_operand")
4402         (vec_concat:<VDBL>
4403           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4404           (match_operand:VDCSIF 1 "nonimmediate_operand")))]
4405   "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4406   {@ [ cons: =0 , 1  ; attrs: type      ]
4407      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4408      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4409      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4410   }
4413 ;; Form a vector whose first half (in array order) comes from operand 1
4414 ;; and whose second half (in array order) comes from operand 2.
4415 ;; This operand order follows the RTL vec_concat operation.
4416 (define_expand "@aarch64_vec_concat<mode>"
4417   [(set (match_operand:<VDBL> 0 "register_operand")
4418         (vec_concat:<VDBL>
4419           (match_operand:VDCSIF 1 "general_operand")
4420           (match_operand:VDCSIF 2 "general_operand")))]
4421   "TARGET_FLOAT"
4423   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4424   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4426   if (MEM_P (operands[1])
4427       && MEM_P (operands[2])
4428       && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4429     /* Use load_pair_lanes<mode>.  */
4430     ;
4431   else if (operands[hi] == CONST0_RTX (<MODE>mode))
4432     {
4433       /* Use *aarch64_combinez<mode>.  */
4434       if (!nonimmediate_operand (operands[lo], <MODE>mode))
4435         operands[lo] = force_reg (<MODE>mode, operands[lo]);
4436     }
4437   else
4438     {
4439       /* Use *aarch64_combine_internal<mode>.  */
4440       operands[lo] = force_reg (<MODE>mode, operands[lo]);
4441       if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4442         {
4443           if (MEM_P (operands[hi]))
4444             {
4445               rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4446               operands[hi] = replace_equiv_address (operands[hi], addr);
4447             }
4448           else
4449             operands[hi] = force_reg (<MODE>mode, operands[hi]);
4450         }
4451     }
4454 ;; Form a vector whose least significant half comes from operand 1 and whose
4455 ;; most significant half comes from operand 2.  This operand order follows
4456 ;; arm_neon.h vcombine* intrinsics.
4457 (define_expand "@aarch64_combine<mode>"
4458   [(match_operand:<VDBL> 0 "register_operand")
4459    (match_operand:VDC 1 "general_operand")
4460    (match_operand:VDC 2 "general_operand")]
4461   "TARGET_FLOAT"
4463   if (BYTES_BIG_ENDIAN)
4464     std::swap (operands[1], operands[2]);
4465   emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4466                                            operands[2]));
4467   DONE;
4471 ;; <su><addsub>l<q>.
4473 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4474  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4475        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4476                            (match_operand:VQW 1 "register_operand" "w")
4477                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4478                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4479                            (match_operand:VQW 2 "register_operand" "w")
4480                            (match_dup 3)))))]
4481   "TARGET_SIMD"
4482   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4483   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4486 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4487  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4488        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4489                            (match_operand:VQW 1 "register_operand" "w")
4490                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4491                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4492                            (match_operand:VQW 2 "register_operand" "w")
4493                            (match_dup 3)))))]
4494   "TARGET_SIMD"
4495   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4496   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4499 (define_expand "vec_widen_<su>add_lo_<mode>"
4500   [(match_operand:<VWIDE> 0 "register_operand")
4501    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4502    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4503   "TARGET_SIMD"
4505   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4506   emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4507                                                      operands[2], p));
4508   DONE;
4511 (define_expand "vec_widen_<su>add_hi_<mode>"
4512   [(match_operand:<VWIDE> 0 "register_operand")
4513    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4514    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4515   "TARGET_SIMD"
4517   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4518   emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4519                                                      operands[2], p));
4520   DONE;
4523 (define_expand "vec_widen_<su>sub_lo_<mode>"
4524   [(match_operand:<VWIDE> 0 "register_operand")
4525    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4526    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4527   "TARGET_SIMD"
4529   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4530   emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4531                                                      operands[2], p));
4532   DONE;
4535 (define_expand "vec_widen_<su>sub_hi_<mode>"
4536   [(match_operand:<VWIDE> 0 "register_operand")
4537    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4538    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4539   "TARGET_SIMD"
4541   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4542   emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4543                                                      operands[2], p));
4544   DONE;
4547 (define_expand "aarch64_saddl2<mode>"
4548   [(match_operand:<VWIDE> 0 "register_operand")
4549    (match_operand:VQW 1 "register_operand")
4550    (match_operand:VQW 2 "register_operand")]
4551   "TARGET_SIMD"
4553   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4554   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4555                                                   operands[2], p));
4556   DONE;
4559 (define_expand "aarch64_uaddl2<mode>"
4560   [(match_operand:<VWIDE> 0 "register_operand")
4561    (match_operand:VQW 1 "register_operand")
4562    (match_operand:VQW 2 "register_operand")]
4563   "TARGET_SIMD"
4565   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4566   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4567                                                   operands[2], p));
4568   DONE;
4571 (define_expand "aarch64_ssubl2<mode>"
4572   [(match_operand:<VWIDE> 0 "register_operand")
4573    (match_operand:VQW 1 "register_operand")
4574    (match_operand:VQW 2 "register_operand")]
4575   "TARGET_SIMD"
4577   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4578   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4579                                                 operands[2], p));
4580   DONE;
4583 (define_expand "aarch64_usubl2<mode>"
4584   [(match_operand:<VWIDE> 0 "register_operand")
4585    (match_operand:VQW 1 "register_operand")
4586    (match_operand:VQW 2 "register_operand")]
4587   "TARGET_SIMD"
4589   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4590   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4591                                                 operands[2], p));
4592   DONE;
4595 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4596  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4597        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4598                            (match_operand:VD_BHSI 1 "register_operand" "w"))
4599                        (ANY_EXTEND:<VWIDE>
4600                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4601   "TARGET_SIMD"
4602   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4603   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4606 ;; <su><addsub>w<q>.
4608 (define_expand "widen_ssum<mode>3"
4609   [(set (match_operand:<VDBLW> 0 "register_operand")
4610         (plus:<VDBLW> (sign_extend:<VDBLW> 
4611                         (match_operand:VQW 1 "register_operand"))
4612                       (match_operand:<VDBLW> 2 "register_operand")))]
4613   "TARGET_SIMD"
4614   {
4615     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4616     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4618     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4619                                                 operands[1], p));
4620     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4621     DONE;
4622   }
4625 (define_expand "widen_ssum<mode>3"
4626   [(set (match_operand:<VWIDE> 0 "register_operand")
4627         (plus:<VWIDE> (sign_extend:<VWIDE>
4628                         (match_operand:VD_BHSI 1 "register_operand"))
4629                       (match_operand:<VWIDE> 2 "register_operand")))]
4630   "TARGET_SIMD"
4632   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4633   DONE;
4636 (define_expand "widen_usum<mode>3"
4637   [(set (match_operand:<VDBLW> 0 "register_operand")
4638         (plus:<VDBLW> (zero_extend:<VDBLW> 
4639                         (match_operand:VQW 1 "register_operand"))
4640                       (match_operand:<VDBLW> 2 "register_operand")))]
4641   "TARGET_SIMD"
4642   {
4643     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4644     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4646     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4647                                                  operands[1], p));
4648     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4649     DONE;
4650   }
4653 (define_expand "widen_usum<mode>3"
4654   [(set (match_operand:<VWIDE> 0 "register_operand")
4655         (plus:<VWIDE> (zero_extend:<VWIDE>
4656                         (match_operand:VD_BHSI 1 "register_operand"))
4657                       (match_operand:<VWIDE> 2 "register_operand")))]
4658   "TARGET_SIMD"
4660   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4661   DONE;
4664 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4665   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4666         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4667           (ANY_EXTEND:<VWIDE>
4668             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4669   "TARGET_SIMD"
4670   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4671   [(set_attr "type" "neon_sub_widen")]
4674 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4675   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4676         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4677           (ANY_EXTEND:<VWIDE>
4678             (vec_select:<VHALF>
4679               (match_operand:VQW 2 "register_operand" "w")
4680               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4681   "TARGET_SIMD"
4682   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4683   [(set_attr "type" "neon_sub_widen")]
4686 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4687   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4688         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4689           (ANY_EXTEND:<VWIDE>
4690             (vec_select:<VHALF>
4691               (match_operand:VQW 2 "register_operand" "w")
4692               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4693   "TARGET_SIMD"
4694   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4695   [(set_attr "type" "neon_sub_widen")]
4698 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4699   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4700         (plus:<VWIDE>
4701           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4702           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4703   "TARGET_SIMD"
4704   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4705   [(set_attr "type" "neon_add_widen")]
4708 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4709   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4710         (plus:<VWIDE>
4711           (ANY_EXTEND:<VWIDE>
4712             (vec_select:<VHALF>
4713               (match_operand:VQW 2 "register_operand" "w")
4714               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4715           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4716   "TARGET_SIMD"
4717   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4718   [(set_attr "type" "neon_add_widen")]
4721 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4722   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4723         (plus:<VWIDE>
4724           (ANY_EXTEND:<VWIDE>
4725             (vec_select:<VHALF>
4726               (match_operand:VQW 2 "register_operand" "w")
4727               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4728           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4729   "TARGET_SIMD"
4730   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4731   [(set_attr "type" "neon_add_widen")]
4734 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4735   [(set (match_operand:<VWIDE> 0 "register_operand")
4736         (ADDSUB:<VWIDE>
4737           (ANY_EXTEND:<VWIDE>
4738             (vec_select:<VHALF>
4739               (match_operand:VQW 2 "register_operand")
4740               (match_dup 3)))
4741           (match_operand:<VWIDE> 1 "register_operand")))]
4742   "TARGET_SIMD"
4744   /* We still do an emit_insn rather than relying on the pattern above
4745      because for the MINUS case the operands would need to be swapped
4746      around.  */
4747   operands[3]
4748     = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4749   emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4750                                                        operands[0],
4751                                                        operands[1],
4752                                                        operands[2],
4753                                                        operands[3]));
4754   DONE;
4757 ;; <su><r>h<addsub>.
4759 (define_expand "<su_optab>avg<mode>3_floor"
4760   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4761         (truncate:VDQ_BHSI
4762           (ashiftrt:<V2XWIDE>
4763             (plus:<V2XWIDE>
4764               (ANY_EXTEND:<V2XWIDE>
4765                 (match_operand:VDQ_BHSI 1 "register_operand"))
4766               (ANY_EXTEND:<V2XWIDE>
4767                 (match_operand:VDQ_BHSI 2 "register_operand")))
4768             (match_dup 3))))]
4769   "TARGET_SIMD"
4770   {
4771     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4772   }
4775 (define_expand "<su_optab>avg<mode>3_ceil"
4776   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4777         (truncate:VDQ_BHSI
4778           (ashiftrt:<V2XWIDE>
4779             (plus:<V2XWIDE>
4780               (plus:<V2XWIDE>
4781                 (ANY_EXTEND:<V2XWIDE>
4782                   (match_operand:VDQ_BHSI 1 "register_operand"))
4783                 (ANY_EXTEND:<V2XWIDE>
4784                   (match_operand:VDQ_BHSI 2 "register_operand")))
4785                (match_dup 3))
4786             (match_dup 3))))]
4787   "TARGET_SIMD"
4788   {
4789     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4790   }
4793 (define_expand "aarch64_<su>hsub<mode>"
4794   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4795         (truncate:VDQ_BHSI
4796           (ashiftrt:<V2XWIDE>
4797             (minus:<V2XWIDE>
4798               (ANY_EXTEND:<V2XWIDE>
4799                 (match_operand:VDQ_BHSI 1 "register_operand"))
4800               (ANY_EXTEND:<V2XWIDE>
4801                 (match_operand:VDQ_BHSI 2 "register_operand")))
4802             (match_dup 3))))]
4803   "TARGET_SIMD"
4804   {
4805     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4806   }
4809 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4810   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4811         (truncate:VDQ_BHSI
4812           (ashiftrt:<V2XWIDE>
4813             (ADDSUB:<V2XWIDE>
4814               (ANY_EXTEND:<V2XWIDE>
4815                 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4816               (ANY_EXTEND:<V2XWIDE>
4817                 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4818             (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4819   "TARGET_SIMD"
4820   "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4821   [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4824 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4825   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4826         (truncate:VDQ_BHSI
4827           (ashiftrt:<V2XWIDE>
4828             (plus:<V2XWIDE>
4829               (plus:<V2XWIDE>
4830                 (ANY_EXTEND:<V2XWIDE>
4831                   (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4832                 (ANY_EXTEND:<V2XWIDE>
4833                   (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4834                (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4835             (match_dup 3))))]
4836   "TARGET_SIMD"
4837   "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4838   [(set_attr "type" "neon_add_halve<q>")]
4841 ;; <r><addsub>hn<q>.
4843 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4844   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4845         (truncate:<VNARROWQ>
4846           (ashiftrt:VQN
4847             (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4848                         (match_operand:VQN 2 "register_operand" "w"))
4849             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4850   "TARGET_SIMD"
4851   "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4852   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4855 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4856   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4857         (truncate:<VNARROWQ>
4858           (ashiftrt:VQN
4859             (plus:VQN
4860               (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4861                           (match_operand:VQN 2 "register_operand" "w"))
4862               (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4863             (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4864   "TARGET_SIMD"
4865   "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4866   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4869 (define_expand "aarch64_<optab>hn<mode>"
4870   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4871         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4872                     (match_operand:VQN 2 "register_operand")))]
4873   "TARGET_SIMD"
4874   {
4875     rtx shft
4876       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4877                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4878     emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4879                                                  operands[2], shft));
4880     DONE;
4881   }
4884 (define_expand "aarch64_r<optab>hn<mode>"
4885   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4886         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4887                     (match_operand:VQN 2 "register_operand")))]
4888   "TARGET_SIMD"
4889   {
4890     rtx shft
4891       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4892                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4893     rtx rnd
4894       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4895         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4896     emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4897                                                   operands[2], rnd, shft));
4898     DONE;
4899   }
4902 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4903   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4904         (vec_concat:<VNARROWQ2>
4905           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4906           (truncate:<VNARROWQ>
4907             (ashiftrt:VQN
4908               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4909                           (match_operand:VQN 3 "register_operand" "w"))
4910               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4911   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4912   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4913   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4916 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4917   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4918         (vec_concat:<VNARROWQ2>
4919           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4920           (truncate:<VNARROWQ>
4921             (ashiftrt:VQN
4922               (plus:VQN
4923                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4924                             (match_operand:VQN 3 "register_operand" "w"))
4925                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4926               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4927   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4928   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4929   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4932 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4933   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4934         (vec_concat:<VNARROWQ2>
4935           (truncate:<VNARROWQ>
4936             (ashiftrt:VQN
4937               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4938                           (match_operand:VQN 3 "register_operand" "w"))
4939               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
4940           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4941   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4942   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4943   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4946 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
4947   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4948         (vec_concat:<VNARROWQ2>
4949           (truncate:<VNARROWQ>
4950             (ashiftrt:VQN
4951               (plus:VQN
4952                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4953                             (match_operand:VQN 3 "register_operand" "w"))
4954                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4955               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
4956           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4957   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4958   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4959   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4962 (define_expand "aarch64_<optab>hn2<mode>"
4963   [(match_operand:<VNARROWQ2> 0 "register_operand")
4964    (match_operand:<VNARROWQ> 1 "register_operand")
4965    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
4966                (match_operand:VQN 3 "register_operand"))]
4967   "TARGET_SIMD"
4968   {
4969     rtx shft
4970       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4971                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4972     if (BYTES_BIG_ENDIAN)
4973       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
4974                                 operands[1], operands[2], operands[3], shft));
4975     else
4976       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
4977                                 operands[1], operands[2], operands[3], shft));
4978     DONE;
4979   }
4982 (define_expand "aarch64_r<optab>hn2<mode>"
4983   [(match_operand:<VNARROWQ2> 0 "register_operand")
4984    (match_operand:<VNARROWQ> 1 "register_operand")
4985    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
4986                (match_operand:VQN 3 "register_operand"))]
4987   "TARGET_SIMD"
4988   {
4989     rtx shft
4990       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4991                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4992     rtx rnd
4993       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4994         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4995     if (BYTES_BIG_ENDIAN)
4996       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
4997                                 operands[1], operands[2], operands[3], rnd, shft));
4998     else
4999       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5000                                 operands[1], operands[2], operands[3], rnd, shft));
5001     DONE;
5002   }
5005 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5006 (define_insn_and_split "*bitmask_shift_plus<mode>"
5007   [(set (match_operand:VQN 0 "register_operand" "=&w")
5008         (plus:VQN
5009           (lshiftrt:VQN
5010             (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5011                       (match_operand:VQN 2 "register_operand" "w"))
5012             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5013           (match_operand:VQN 4 "register_operand" "w")))]
5014   "TARGET_SIMD"
5015   "#"
5016   "&& true"
5017   [(const_int 0)]
5019   rtx tmp;
5020   if (can_create_pseudo_p ())
5021     tmp = gen_reg_rtx (<VNARROWQ>mode);
5022   else
5023     tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5024   emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5025   emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5026   DONE;
5029 ;; pmul.
5031 (define_insn "aarch64_pmul<mode>"
5032   [(set (match_operand:VB 0 "register_operand" "=w")
5033         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5034                     (match_operand:VB 2 "register_operand" "w")]
5035                    UNSPEC_PMUL))]
5036  "TARGET_SIMD"
5037  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5038   [(set_attr "type" "neon_mul_<Vetype><q>")]
5041 (define_insn "aarch64_pmullv8qi"
5042   [(set (match_operand:V8HI 0 "register_operand" "=w")
5043         (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5044                       (match_operand:V8QI 2 "register_operand" "w")]
5045                      UNSPEC_PMULL))]
5046  "TARGET_SIMD"
5047  "pmull\\t%0.8h, %1.8b, %2.8b"
5048   [(set_attr "type" "neon_mul_b_long")]
5051 (define_insn "aarch64_pmull_hiv16qi_insn"
5052   [(set (match_operand:V8HI 0 "register_operand" "=w")
5053         (unspec:V8HI
5054           [(vec_select:V8QI
5055              (match_operand:V16QI 1 "register_operand" "w")
5056              (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5057            (vec_select:V8QI
5058              (match_operand:V16QI 2 "register_operand" "w")
5059              (match_dup 3))]
5060           UNSPEC_PMULL))]
5061  "TARGET_SIMD"
5062  "pmull2\\t%0.8h, %1.16b, %2.16b"
5063   [(set_attr "type" "neon_mul_b_long")]
5066 (define_expand "aarch64_pmull_hiv16qi"
5067   [(match_operand:V8HI 0 "register_operand")
5068    (match_operand:V16QI 1 "register_operand")
5069    (match_operand:V16QI 2 "register_operand")]
5070  "TARGET_SIMD"
5072    rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5073    emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5074                                               operands[2], p));
5075    DONE;
5079 ;; fmulx.
5081 (define_insn "aarch64_fmulx<mode>"
5082   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5083         (unspec:VHSDF_HSDF
5084           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5085            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5086            UNSPEC_FMULX))]
5087  "TARGET_SIMD"
5088  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5089  [(set_attr "type" "neon_fp_mul_<stype>")]
5092 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5094 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5095   [(set (match_operand:VDQSF 0 "register_operand" "=w")
5096         (unspec:VDQSF
5097          [(match_operand:VDQSF 1 "register_operand" "w")
5098           (vec_duplicate:VDQSF
5099            (vec_select:<VEL>
5100             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5101             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5102          UNSPEC_FMULX))]
5103   "TARGET_SIMD"
5104   {
5105     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5106     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5107   }
5108   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5111 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5113 (define_insn "*aarch64_mulx_elt<mode>"
5114   [(set (match_operand:VDQF 0 "register_operand" "=w")
5115         (unspec:VDQF
5116          [(match_operand:VDQF 1 "register_operand" "w")
5117           (vec_duplicate:VDQF
5118            (vec_select:<VEL>
5119             (match_operand:VDQF 2 "register_operand" "w")
5120             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5121          UNSPEC_FMULX))]
5122   "TARGET_SIMD"
5123   {
5124     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5125     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5126   }
5127   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5130 ;; vmulxq_lane
5132 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5133   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5134         (unspec:VHSDF
5135          [(match_operand:VHSDF 1 "register_operand" "w")
5136           (vec_duplicate:VHSDF
5137             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5138          UNSPEC_FMULX))]
5139   "TARGET_SIMD"
5140   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5141   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5144 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5145 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
5146 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5148 (define_insn "*aarch64_vgetfmulx<mode>"
5149   [(set (match_operand:<VEL> 0 "register_operand" "=w")
5150         (unspec:<VEL>
5151          [(match_operand:<VEL> 1 "register_operand" "w")
5152           (vec_select:<VEL>
5153            (match_operand:VDQF 2 "register_operand" "w")
5154             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5155          UNSPEC_FMULX))]
5156   "TARGET_SIMD"
5157   {
5158     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5159     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5160   }
5161   [(set_attr "type" "fmul<Vetype>")]
5163 ;; <su>q<addsub>
5165 (define_insn "<su_optab>s<addsub><mode>3<vczle><vczbe>"
5166   [(set (match_operand:VSDQ_I_QI_HI 0 "register_operand" "=w")
5167         (BINQOPS:VSDQ_I_QI_HI
5168           (match_operand:VSDQ_I_QI_HI 1 "register_operand" "w")
5169           (match_operand:VSDQ_I_QI_HI 2 "register_operand" "w")))]
5170   "TARGET_SIMD"
5171   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5172   [(set_attr "type" "neon_q<addsub><q>")]
5175 (define_expand "<su_optab>s<addsub><mode>3"
5176   [(parallel
5177     [(set (match_operand:GPI 0 "register_operand")
5178           (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5179                         (match_operand:GPI 2 "aarch64_plus_operand")))
5180     (clobber (scratch:GPI))
5181     (clobber (reg:CC CC_REGNUM))])]
5184 ;; Introducing a temporary GP reg allows signed saturating arithmetic with GPR
5185 ;; operands to be calculated without the use of costly transfers to and from FP
5186 ;; registers.  For example, saturating addition usually uses three FMOVs:
5188 ;;   fmov       d0, x0
5189 ;;   fmov       d1, x1
5190 ;;   sqadd      d0, d0, d1
5191 ;;   fmov       x0, d0
5193 ;; Using a temporary register results in three cheaper instructions being used
5194 ;; in place of the three FMOVs, which calculate the saturating limit accounting
5195 ;; for the signedness of operand2:
5197 ;;   asr        x2, x1, 63
5198 ;;   adds       x0, x0, x1
5199 ;;   eor        x2, x2, 0x8000000000000000
5200 ;;   csinv      x0, x0, x2, vc
5202 ;; If operand2 is a constant value, the temporary register can be used to store
5203 ;; the saturating limit without the need for asr, xor to calculate said limit.
5205 (define_insn_and_split "aarch64_<su_optab>s<addsub><mode>3<vczle><vczbe>"
5206   [(set (match_operand:GPI 0 "register_operand")
5207         (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5208                       (match_operand:GPI 2 "aarch64_plus_operand")))
5209     (clobber (match_scratch:GPI 3))
5210     (clobber (reg:CC CC_REGNUM))]
5211   ""
5212   {@ [ cons: =0, 1 , 2   , =3 ; attrs: type       , arch , length ]
5213      [ w       , w , w   , X  ; neon_q<addsub><q> , simd , 4      ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
5214      [ r       , r , JIr , &r ; *                 , *    , 8      ] #
5215   }
5216   "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
5217   [(set (match_dup 0)
5218         (if_then_else:GPI
5219           (match_dup 4)
5220           (match_dup 5)
5221           (match_dup 6)))]
5222   {
5223     if (REG_P (operands[2]))
5224       {
5225       rtx shift_constant = gen_int_mode (GET_MODE_BITSIZE (<MODE>mode) - 1,
5226                                          <MODE>mode);
5227       auto limit = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1);
5228       rtx limit_constant = gen_int_mode (limit, <MODE>mode);
5229       emit_insn (gen_ashr<mode>3 (operands[3], operands[2], shift_constant));
5230       emit_insn (gen_xor<mode>3 (operands[3], operands[3], limit_constant));
5232       switch (<CODE>)
5233         {
5234         case SS_MINUS:
5235           emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
5236                                               operands[2]));
5237         break;
5238         case SS_PLUS:
5239           emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
5240                                               operands[2]));
5241           break;
5242         default:
5243           gcc_unreachable ();
5244         }
5246       rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
5247       switch (<CODE>)
5248         {
5249         case SS_PLUS:
5250           operands[4] = gen_rtx_NE (<MODE>mode, ccin, const0_rtx);
5251           operands[5] = gen_rtx_NOT (<MODE>mode, operands[3]);
5252           operands[6] = operands[0];
5253           break;
5254         case SS_MINUS:
5255           operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
5256           operands[5] = operands[0];
5257           operands[6] = operands[3];
5258           break;
5259         default:
5260           gcc_unreachable ();
5261         }
5262       }
5263     else
5264       {
5265         auto imm = INTVAL (operands[2]);
5266         rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
5267         wide_int limit;
5269         switch (<CODE>)
5270           {
5271           case SS_MINUS:
5272             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5273                                                     operands[2], neg_imm));
5274             limit = imm >= 0 ? wi::min_value (<MODE>mode, SIGNED)
5275                              : wi::max_value (<MODE>mode, SIGNED);
5276             break;
5277           case SS_PLUS:
5278             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5279                                                     neg_imm, operands[2]));
5280             limit = imm >= 0 ? wi::max_value (<MODE>mode, SIGNED)
5281                              : wi::min_value (<MODE>mode, SIGNED);
5282             break;
5283           default:
5284             gcc_unreachable ();
5285           }
5287       rtx sat_limit = immed_wide_int_const (limit, <MODE>mode);
5288       emit_insn (gen_rtx_SET (operands[3], sat_limit));
5290       rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
5291       operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
5292       operands[5] = operands[0];
5293       operands[6] = operands[3];
5294       }
5295   }
5298 ;; Unsigned saturating arithmetic with GPR operands can be optimised similarly
5299 ;; to the signed case, albeit without the need for a temporary register as the
5300 ;; saturating limit can be inferred from the <addsub> code.  This applies only
5301 ;; to SImode and DImode.
5303 (define_insn_and_split "<su_optab>s<addsub><mode>3<vczle><vczbe>"
5304   [(set (match_operand:GPI 0 "register_operand")
5305         (UBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5306                       (match_operand:GPI 2 "aarch64_plus_operand")))
5307     (clobber (reg:CC CC_REGNUM))]
5308   ""
5309   {@ [ cons: =0, 1 , 2   ; attrs: type       , arch , length ]
5310      [ w       , w , w   ; neon_q<addsub><q> , simd , 4      ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
5311      [ r       , r , JIr ; *                 , *    , 8      ] #
5312   }
5313   "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
5314   [(set (match_dup 0)
5315         (if_then_else:GPI
5316           (match_dup 3)
5317           (match_dup 0)
5318           (match_dup 4)))]
5319   {
5321     if (REG_P (operands[2]))
5322       {
5323         switch (<CODE>)
5324           {
5325           case US_MINUS:
5326             emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
5327                                                 operands[2]));
5328             break;
5329           case US_PLUS:
5330             emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
5331                                                 operands[2]));
5332             break;
5333           default:
5334             gcc_unreachable ();
5335           }
5336       }
5337     else
5338       {
5339         auto imm = UINTVAL (operands[2]);
5340         rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
5341         switch (<CODE>)
5342           {
5343           case US_MINUS:
5344             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5345                                                     operands[2], neg_imm));
5346             break;
5347           case US_PLUS:
5348             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5349                                                     neg_imm, operands[2]));
5350             break;
5351           default:
5352             gcc_unreachable ();
5353           }
5354       }
5356     rtx ccin = gen_rtx_REG (CCmode, CC_REGNUM);
5357     switch (<CODE>)
5358       {
5359       case US_PLUS:
5360         operands[3] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
5361         operands[4] = gen_int_mode (-1, <MODE>mode);
5362         break;
5363       case US_MINUS:
5364         operands[3] = gen_rtx_GEU (<MODE>mode, ccin, const0_rtx);
5365         operands[4] = const0_rtx;
5366         break;
5367       default:
5368         gcc_unreachable ();
5369       }
5370   }
5373 ;; suqadd and usqadd
5375 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5376   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5377         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5378                         (match_operand:VSDQ_I 2 "register_operand" "w")]
5379                        USSUQADD))]
5380   "TARGET_SIMD"
5381   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5382   [(set_attr "type" "neon_qadd<q>")]
5385 ;; sqmovn and uqmovn
5387 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5388   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5389         (SAT_TRUNC:<VNARROWQ>
5390           (match_operand:SD_HSDI 1 "register_operand" "w")))]
5391   "TARGET_SIMD"
5392   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5393   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5396 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5397   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5398         (SAT_TRUNC:<VNARROWQ>
5399           (match_operand:VQN 1 "register_operand" "w")))]
5400   "TARGET_SIMD"
5401   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5402   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5405 (define_insn "aarch64_<su>qxtn2<mode>_le"
5406   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5407         (vec_concat:<VNARROWQ2>
5408           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5409           (SAT_TRUNC:<VNARROWQ>
5410             (match_operand:VQN 2 "register_operand" "w"))))]
5411   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5412   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5413    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5416 (define_insn "aarch64_<su>qxtn2<mode>_be"
5417   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5418         (vec_concat:<VNARROWQ2>
5419           (SAT_TRUNC:<VNARROWQ>
5420             (match_operand:VQN 2 "register_operand" "w"))
5421           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5422   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5423   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5424    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5427 (define_expand "aarch64_<su>qxtn2<mode>"
5428   [(match_operand:<VNARROWQ2> 0 "register_operand")
5429    (match_operand:<VNARROWQ> 1 "register_operand")
5430    (SAT_TRUNC:<VNARROWQ>
5431      (match_operand:VQN 2 "register_operand"))]
5432   "TARGET_SIMD"
5433   {
5434     if (BYTES_BIG_ENDIAN)
5435       emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5436                                                  operands[2]));
5437     else
5438       emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5439                                                  operands[2]));
5440     DONE;
5441   }
5444 ;; sqmovun
5446 (define_insn "aarch64_sqmovun<mode>"
5447   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5448         (truncate:<VNARROWQ>
5449           (smin:SD_HSDI
5450             (smax:SD_HSDI
5451               (match_operand:SD_HSDI 1 "register_operand" "w")
5452               (const_int 0))
5453             (const_int <half_mask>))))]
5454    "TARGET_SIMD"
5455    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5456    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5459 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5460   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5461         (truncate:<VNARROWQ>
5462           (smin:VQN
5463             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5464                       (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5465             (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5466   "TARGET_SIMD"
5467   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5468   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5471 (define_expand "aarch64_sqmovun<mode>"
5472   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5473         (truncate:<VNARROWQ>
5474           (smin:VQN
5475             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5476                       (match_dup 2))
5477             (match_dup 3))))]
5478   "TARGET_SIMD"
5479   {
5480     operands[2] = CONST0_RTX (<MODE>mode);
5481     operands[3]
5482       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5483                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5484   }
5487 (define_insn "aarch64_sqxtun2<mode>_le"
5488   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5489         (vec_concat:<VNARROWQ2>
5490           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5491           (truncate:<VNARROWQ>
5492             (smin:VQN
5493               (smax:VQN
5494                 (match_operand:VQN 2 "register_operand" "w")
5495                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5496               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5497   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5498   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5499    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5502 (define_insn "aarch64_sqxtun2<mode>_be"
5503   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5504         (vec_concat:<VNARROWQ2>
5505           (truncate:<VNARROWQ>
5506             (smin:VQN
5507               (smax:VQN
5508                 (match_operand:VQN 2 "register_operand" "w")
5509                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5510               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5511           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5512   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5513   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5514    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5517 (define_expand "aarch64_sqxtun2<mode>"
5518   [(match_operand:<VNARROWQ2> 0 "register_operand")
5519    (match_operand:<VNARROWQ> 1 "register_operand")
5520    (match_operand:VQN 2 "register_operand")]
5521   "TARGET_SIMD"
5522   {
5523     rtx zeros = CONST0_RTX (<MODE>mode);
5524     rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5525                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5526     if (BYTES_BIG_ENDIAN)
5527       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5528                                                operands[2], zeros, half_umax));
5529     else
5530       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5531                                                operands[2], zeros, half_umax));
5532     DONE;
5533   }
5536 ;; <su>q<absneg>
5538 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5539   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5540         (UNQOPS:VSDQ_I
5541           (match_operand:VSDQ_I 1 "register_operand" "w")))]
5542   "TARGET_SIMD"
5543   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5544   [(set_attr "type" "neon_<optab><q>")]
5547 ;; sq<r>dmulh.
5549 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5550   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5551         (unspec:VSDQ_HSI
5552           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5553            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5554          VQDMULH))]
5555   "TARGET_SIMD"
5556   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5557   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5560 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5561   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5562         (unspec:VDQHS
5563           [(match_operand:VDQHS 1 "register_operand" "w")
5564            (vec_duplicate:VDQHS
5565              (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5566          VQDMULH))]
5567   "TARGET_SIMD"
5568   "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5569   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5572 ;; sq<r>dmulh_lane
5574 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5575   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5576         (unspec:VDQHS
5577           [(match_operand:VDQHS 1 "register_operand" "w")
5578            (vec_select:<VEL>
5579              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5580              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5581          VQDMULH))]
5582   "TARGET_SIMD"
5583   "*
5584    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5585    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5586   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5589 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5590   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5591         (unspec:VDQHS
5592           [(match_operand:VDQHS 1 "register_operand" "w")
5593            (vec_select:<VEL>
5594              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5595              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5596          VQDMULH))]
5597   "TARGET_SIMD"
5598   "*
5599    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5600    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5601   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5604 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5605   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5606         (unspec:SD_HSI
5607           [(match_operand:SD_HSI 1 "register_operand" "w")
5608            (vec_select:<VEL>
5609              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5610              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5611          VQDMULH))]
5612   "TARGET_SIMD"
5613   "*
5614    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5615    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5616   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5619 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5620   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5621         (unspec:SD_HSI
5622           [(match_operand:SD_HSI 1 "register_operand" "w")
5623            (vec_select:<VEL>
5624              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5625              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5626          VQDMULH))]
5627   "TARGET_SIMD"
5628   "*
5629    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5630    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5631   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5634 ;; sqrdml[as]h.
5636 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5637   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5638         (unspec:VSDQ_HSI
5639           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5640            (match_operand:VSDQ_HSI 2 "register_operand" "w")
5641            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5642           SQRDMLH_AS))]
5643    "TARGET_SIMD_RDMA"
5644    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5645    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5648 ;; sqrdml[as]h_lane.
5650 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5651   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5652         (unspec:VDQHS
5653           [(match_operand:VDQHS 1 "register_operand" "0")
5654            (match_operand:VDQHS 2 "register_operand" "w")
5655            (vec_select:<VEL>
5656              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5657              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5658           SQRDMLH_AS))]
5659    "TARGET_SIMD_RDMA"
5660    {
5661      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5662      return
5663       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5664    }
5665    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5668 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5669   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5670         (unspec:SD_HSI
5671           [(match_operand:SD_HSI 1 "register_operand" "0")
5672            (match_operand:SD_HSI 2 "register_operand" "w")
5673            (vec_select:<VEL>
5674              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5675              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5676           SQRDMLH_AS))]
5677    "TARGET_SIMD_RDMA"
5678    {
5679      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5680      return
5681       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5682    }
5683    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5686 ;; sqrdml[as]h_laneq.
5688 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5689   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5690         (unspec:VDQHS
5691           [(match_operand:VDQHS 1 "register_operand" "0")
5692            (match_operand:VDQHS 2 "register_operand" "w")
5693            (vec_select:<VEL>
5694              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5695              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5696           SQRDMLH_AS))]
5697    "TARGET_SIMD_RDMA"
5698    {
5699      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5700      return
5701       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5702    }
5703    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5706 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5707   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5708         (unspec:SD_HSI
5709           [(match_operand:SD_HSI 1 "register_operand" "0")
5710            (match_operand:SD_HSI 2 "register_operand" "w")
5711            (vec_select:<VEL>
5712              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5713              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5714           SQRDMLH_AS))]
5715    "TARGET_SIMD_RDMA"
5716    {
5717      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5718      return
5719       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5720    }
5721    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5724 ;; vqdml[sa]l
5726 (define_insn "aarch64_sqdmlal<mode>"
5727   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5728         (ss_plus:<VWIDE>
5729           (ss_ashift:<VWIDE>
5730               (mult:<VWIDE>
5731                 (sign_extend:<VWIDE>
5732                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5733                 (sign_extend:<VWIDE>
5734                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5735               (const_int 1))
5736           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5737   "TARGET_SIMD"
5738   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5739   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5742 (define_insn "aarch64_sqdmlsl<mode>"
5743   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5744         (ss_minus:<VWIDE>
5745           (match_operand:<VWIDE> 1 "register_operand" "0")
5746           (ss_ashift:<VWIDE>
5747               (mult:<VWIDE>
5748                 (sign_extend:<VWIDE>
5749                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5750                 (sign_extend:<VWIDE>
5751                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5752               (const_int 1))))]
5753   "TARGET_SIMD"
5754   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5755   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5758 ;; vqdml[sa]l_lane
5760 (define_insn "aarch64_sqdmlal_lane<mode>"
5761   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5762         (ss_plus:<VWIDE>
5763           (ss_ashift:<VWIDE>
5764             (mult:<VWIDE>
5765               (sign_extend:<VWIDE>
5766                 (match_operand:VD_HSI 2 "register_operand" "w"))
5767               (vec_duplicate:<VWIDE>
5768                 (sign_extend:<VWIDE_S>
5769                   (vec_select:<VEL>
5770                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5771                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5772               ))
5773             (const_int 1))
5774           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5775   "TARGET_SIMD"
5776   {
5777     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5778     return
5779       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5780   }
5781   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5784 (define_insn "aarch64_sqdmlsl_lane<mode>"
5785   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5786         (ss_minus:<VWIDE>
5787           (match_operand:<VWIDE> 1 "register_operand" "0")
5788           (ss_ashift:<VWIDE>
5789             (mult:<VWIDE>
5790               (sign_extend:<VWIDE>
5791                 (match_operand:VD_HSI 2 "register_operand" "w"))
5792               (vec_duplicate:<VWIDE>
5793                 (sign_extend:<VWIDE_S>
5794                   (vec_select:<VEL>
5795                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5796                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5797               ))
5798             (const_int 1))))]
5799   "TARGET_SIMD"
5800   {
5801     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5802     return
5803       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5804   }
5805   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5809 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5810   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5811         (ss_minus:<VWIDE>
5812           (match_operand:<VWIDE> 1 "register_operand" "0")
5813           (ss_ashift:<VWIDE>
5814             (mult:<VWIDE>
5815               (sign_extend:<VWIDE>
5816                 (match_operand:VD_HSI 2 "register_operand" "w"))
5817               (vec_duplicate:<VWIDE>
5818                 (sign_extend:<VWIDE_S>
5819                   (vec_select:<VEL>
5820                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5821                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5822               ))
5823             (const_int 1))))]
5824   "TARGET_SIMD"
5825   {
5826     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5827     return
5828       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5829   }
5830   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5833 (define_insn "aarch64_sqdmlal_laneq<mode>"
5834   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5835         (ss_plus:<VWIDE>
5836           (ss_ashift:<VWIDE>
5837             (mult:<VWIDE>
5838               (sign_extend:<VWIDE>
5839                 (match_operand:VD_HSI 2 "register_operand" "w"))
5840               (vec_duplicate:<VWIDE>
5841                 (sign_extend:<VWIDE_S>
5842                   (vec_select:<VEL>
5843                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5844                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5845               ))
5846             (const_int 1))
5847           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5848   "TARGET_SIMD"
5849   {
5850     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5851     return
5852       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5853   }
5854   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5858 (define_insn "aarch64_sqdmlal_lane<mode>"
5859   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5860         (ss_plus:<VWIDE>
5861           (ss_ashift:<VWIDE>
5862             (mult:<VWIDE>
5863               (sign_extend:<VWIDE>
5864                 (match_operand:SD_HSI 2 "register_operand" "w"))
5865               (sign_extend:<VWIDE>
5866                 (vec_select:<VEL>
5867                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5868                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5869               )
5870             (const_int 1))
5871           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5872   "TARGET_SIMD"
5873   {
5874     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5875     return
5876       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5877   }
5878   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5881 (define_insn "aarch64_sqdmlsl_lane<mode>"
5882   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5883         (ss_minus:<VWIDE>
5884           (match_operand:<VWIDE> 1 "register_operand" "0")
5885           (ss_ashift:<VWIDE>
5886             (mult:<VWIDE>
5887               (sign_extend:<VWIDE>
5888                 (match_operand:SD_HSI 2 "register_operand" "w"))
5889               (sign_extend:<VWIDE>
5890                 (vec_select:<VEL>
5891                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5892                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5893               )
5894             (const_int 1))))]
5895   "TARGET_SIMD"
5896   {
5897     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5898     return
5899       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5900   }
5901   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5905 (define_insn "aarch64_sqdmlal_laneq<mode>"
5906   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5907         (ss_plus:<VWIDE>
5908           (ss_ashift:<VWIDE>
5909             (mult:<VWIDE>
5910               (sign_extend:<VWIDE>
5911                 (match_operand:SD_HSI 2 "register_operand" "w"))
5912               (sign_extend:<VWIDE>
5913                 (vec_select:<VEL>
5914                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5915                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5916               )
5917             (const_int 1))
5918           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5919   "TARGET_SIMD"
5920   {
5921     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5922     return
5923       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5924   }
5925   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5928 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5929   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5930         (ss_minus:<VWIDE>
5931           (match_operand:<VWIDE> 1 "register_operand" "0")
5932           (ss_ashift:<VWIDE>
5933             (mult:<VWIDE>
5934               (sign_extend:<VWIDE>
5935                 (match_operand:SD_HSI 2 "register_operand" "w"))
5936               (sign_extend:<VWIDE>
5937                 (vec_select:<VEL>
5938                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5939                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5940               )
5941             (const_int 1))))]
5942   "TARGET_SIMD"
5943   {
5944     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5945     return
5946       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5947   }
5948   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5951 ;; vqdml[sa]l_n
5953 (define_insn "aarch64_sqdmlsl_n<mode>"
5954   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5955         (ss_minus:<VWIDE>
5956           (match_operand:<VWIDE> 1 "register_operand" "0")
5957           (ss_ashift:<VWIDE>
5958               (mult:<VWIDE>
5959                 (sign_extend:<VWIDE>
5960                       (match_operand:VD_HSI 2 "register_operand" "w"))
5961                 (vec_duplicate:<VWIDE>
5962                   (sign_extend:<VWIDE_S>
5963                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5964               (const_int 1))))]
5965   "TARGET_SIMD"
5966   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5967   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5970 (define_insn "aarch64_sqdmlal_n<mode>"
5971   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5972         (ss_plus:<VWIDE>
5973           (ss_ashift:<VWIDE>
5974               (mult:<VWIDE>
5975                 (sign_extend:<VWIDE>
5976                       (match_operand:VD_HSI 2 "register_operand" "w"))
5977                 (vec_duplicate:<VWIDE>
5978                   (sign_extend:<VWIDE_S>
5979                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5980               (const_int 1))
5981           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5982   "TARGET_SIMD"
5983   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5984   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5988 ;; sqdml[as]l2
5990 (define_insn "aarch64_sqdmlal2<mode>_internal"
5991   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5992         (ss_plus:<VWIDE>
5993          (ss_ashift:<VWIDE>
5994              (mult:<VWIDE>
5995                (sign_extend:<VWIDE>
5996                  (vec_select:<VHALF>
5997                      (match_operand:VQ_HSI 2 "register_operand" "w")
5998                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5999                (sign_extend:<VWIDE>
6000                  (vec_select:<VHALF>
6001                      (match_operand:VQ_HSI 3 "register_operand" "w")
6002                      (match_dup 4))))
6003              (const_int 1))
6004           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6005   "TARGET_SIMD"
6006   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
6007   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6010 (define_insn "aarch64_sqdmlsl2<mode>_internal"
6011   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6012         (ss_minus:<VWIDE>
6013          (match_operand:<VWIDE> 1 "register_operand" "0")
6014          (ss_ashift:<VWIDE>
6015              (mult:<VWIDE>
6016                (sign_extend:<VWIDE>
6017                  (vec_select:<VHALF>
6018                      (match_operand:VQ_HSI 2 "register_operand" "w")
6019                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6020                (sign_extend:<VWIDE>
6021                  (vec_select:<VHALF>
6022                      (match_operand:VQ_HSI 3 "register_operand" "w")
6023                      (match_dup 4))))
6024              (const_int 1))))]
6025   "TARGET_SIMD"
6026   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
6027   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6030 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
6031   [(match_operand:<VWIDE> 0 "register_operand")
6032    (SBINQOPS:<VWIDE>
6033      (match_operand:<VWIDE> 1 "register_operand")
6034      (match_dup 1))
6035    (match_operand:VQ_HSI 2 "register_operand")
6036    (match_operand:VQ_HSI 3 "register_operand")]
6037   "TARGET_SIMD"
6039   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6040   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
6041                                                 operands[1], operands[2],
6042                                                 operands[3], p));
6043   DONE;
6046 ;; vqdml[sa]l2_lane
6048 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
6049   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6050         (ss_minus:<VWIDE>
6051           (match_operand:<VWIDE> 1 "register_operand" "0")
6052           (ss_ashift:<VWIDE>
6053               (mult:<VWIDE>
6054                 (sign_extend:<VWIDE>
6055                   (vec_select:<VHALF>
6056                     (match_operand:VQ_HSI 2 "register_operand" "w")
6057                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6058                 (vec_duplicate:<VWIDE>
6059                   (sign_extend:<VWIDE_S>
6060                     (vec_select:<VEL>
6061                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
6062                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6063                     ))))
6064               (const_int 1))))]
6065   "TARGET_SIMD"
6066   {
6067     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
6068     return
6069      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6070   }
6071   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6074 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
6075   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6076         (ss_plus:<VWIDE>
6077           (ss_ashift:<VWIDE>
6078               (mult:<VWIDE>
6079                 (sign_extend:<VWIDE>
6080                   (vec_select:<VHALF>
6081                     (match_operand:VQ_HSI 2 "register_operand" "w")
6082                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6083                 (vec_duplicate:<VWIDE>
6084                   (sign_extend:<VWIDE_S>
6085                     (vec_select:<VEL>
6086                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
6087                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6088                     ))))
6089               (const_int 1))
6090           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6091   "TARGET_SIMD"
6092   {
6093     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
6094     return
6095      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6096   }
6097   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6100 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
6101   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6102         (ss_minus:<VWIDE>
6103           (match_operand:<VWIDE> 1 "register_operand" "0")
6104           (ss_ashift:<VWIDE>
6105               (mult:<VWIDE>
6106                 (sign_extend:<VWIDE>
6107                   (vec_select:<VHALF>
6108                     (match_operand:VQ_HSI 2 "register_operand" "w")
6109                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6110                 (vec_duplicate:<VWIDE>
6111                   (sign_extend:<VWIDE_S>
6112                     (vec_select:<VEL>
6113                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6114                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6115                     ))))
6116               (const_int 1))))]
6117   "TARGET_SIMD"
6118   {
6119     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6120     return
6121      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6122   }
6123   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6126 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
6127   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6128         (ss_plus:<VWIDE>
6129           (ss_ashift:<VWIDE>
6130               (mult:<VWIDE>
6131                 (sign_extend:<VWIDE>
6132                   (vec_select:<VHALF>
6133                     (match_operand:VQ_HSI 2 "register_operand" "w")
6134                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6135                 (vec_duplicate:<VWIDE>
6136                   (sign_extend:<VWIDE_S>
6137                     (vec_select:<VEL>
6138                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6139                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6140                     ))))
6141               (const_int 1))
6142           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6143   "TARGET_SIMD"
6144   {
6145     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6146     return
6147      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6148   }
6149   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6152 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
6153   [(match_operand:<VWIDE> 0 "register_operand")
6154    (SBINQOPS:<VWIDE>
6155      (match_operand:<VWIDE> 1 "register_operand")
6156      (match_dup 1))
6157    (match_operand:VQ_HSI 2 "register_operand")
6158    (match_operand:<VCOND> 3 "register_operand")
6159    (match_operand:SI 4 "immediate_operand")]
6160   "TARGET_SIMD"
6162   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6163   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6164                                                 operands[1], operands[2],
6165                                                 operands[3], operands[4], p));
6166   DONE;
6169 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6170   [(match_operand:<VWIDE> 0 "register_operand")
6171    (SBINQOPS:<VWIDE>
6172      (match_operand:<VWIDE> 1 "register_operand")
6173      (match_dup 1))
6174    (match_operand:VQ_HSI 2 "register_operand")
6175    (match_operand:<VCONQ> 3 "register_operand")
6176    (match_operand:SI 4 "immediate_operand")]
6177   "TARGET_SIMD"
6179   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6180   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6181                                                 operands[1], operands[2],
6182                                                 operands[3], operands[4], p));
6183   DONE;
6186 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6187   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6188         (ss_minus:<VWIDE>
6189           (match_operand:<VWIDE> 1 "register_operand" "0")
6190           (ss_ashift:<VWIDE>
6191             (mult:<VWIDE>
6192               (sign_extend:<VWIDE>
6193                 (vec_select:<VHALF>
6194                   (match_operand:VQ_HSI 2 "register_operand" "w")
6195                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6196               (vec_duplicate:<VWIDE>
6197                 (sign_extend:<VWIDE_S>
6198                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6199             (const_int 1))))]
6200   "TARGET_SIMD"
6201   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6202   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6205 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6206   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6207         (ss_plus:<VWIDE>
6208           (ss_ashift:<VWIDE>
6209             (mult:<VWIDE>
6210               (sign_extend:<VWIDE>
6211                 (vec_select:<VHALF>
6212                   (match_operand:VQ_HSI 2 "register_operand" "w")
6213                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6214               (vec_duplicate:<VWIDE>
6215                 (sign_extend:<VWIDE_S>
6216                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6217             (const_int 1))
6218           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6219   "TARGET_SIMD"
6220   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6221   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6224 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6225   [(match_operand:<VWIDE> 0 "register_operand")
6226    (SBINQOPS:<VWIDE>
6227      (match_operand:<VWIDE> 1 "register_operand")
6228      (match_dup 1))
6229    (match_operand:VQ_HSI 2 "register_operand")
6230    (match_operand:<VEL> 3 "register_operand")]
6231   "TARGET_SIMD"
6233   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6234   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6235                                                 operands[1], operands[2],
6236                                                 operands[3], p));
6237   DONE;
6240 ;; vqdmull
6242 (define_insn "aarch64_sqdmull<mode>"
6243   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6244         (ss_ashift:<VWIDE>
6245              (mult:<VWIDE>
6246                (sign_extend:<VWIDE>
6247                      (match_operand:VSD_HSI 1 "register_operand" "w"))
6248                (sign_extend:<VWIDE>
6249                      (match_operand:VSD_HSI 2 "register_operand" "w")))
6250              (const_int 1)))]
6251   "TARGET_SIMD"
6252   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6253   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6256 ;; vqdmull_lane
6258 (define_insn "aarch64_sqdmull_lane<mode>"
6259   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6260         (ss_ashift:<VWIDE>
6261              (mult:<VWIDE>
6262                (sign_extend:<VWIDE>
6263                  (match_operand:VD_HSI 1 "register_operand" "w"))
6264                (vec_duplicate:<VWIDE>
6265                  (sign_extend:<VWIDE_S>
6266                    (vec_select:<VEL>
6267                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6268                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6269                ))
6270              (const_int 1)))]
6271   "TARGET_SIMD"
6272   {
6273     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6274     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6275   }
6276   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6279 (define_insn "aarch64_sqdmull_laneq<mode>"
6280   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6281         (ss_ashift:<VWIDE>
6282              (mult:<VWIDE>
6283                (sign_extend:<VWIDE>
6284                  (match_operand:VD_HSI 1 "register_operand" "w"))
6285                (vec_duplicate:<VWIDE>
6286                  (sign_extend:<VWIDE_S>
6287                    (vec_select:<VEL>
6288                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6289                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6290                ))
6291              (const_int 1)))]
6292   "TARGET_SIMD"
6293   {
6294     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6295     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6296   }
6297   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6300 (define_insn "aarch64_sqdmull_lane<mode>"
6301   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6302         (ss_ashift:<VWIDE>
6303              (mult:<VWIDE>
6304                (sign_extend:<VWIDE>
6305                  (match_operand:SD_HSI 1 "register_operand" "w"))
6306                (sign_extend:<VWIDE>
6307                  (vec_select:<VEL>
6308                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6309                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6310                ))
6311              (const_int 1)))]
6312   "TARGET_SIMD"
6313   {
6314     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6315     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6316   }
6317   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6320 (define_insn "aarch64_sqdmull_laneq<mode>"
6321   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6322         (ss_ashift:<VWIDE>
6323              (mult:<VWIDE>
6324                (sign_extend:<VWIDE>
6325                  (match_operand:SD_HSI 1 "register_operand" "w"))
6326                (sign_extend:<VWIDE>
6327                  (vec_select:<VEL>
6328                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6329                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6330                ))
6331              (const_int 1)))]
6332   "TARGET_SIMD"
6333   {
6334     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6335     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6336   }
6337   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6340 ;; vqdmull_n
6342 (define_insn "aarch64_sqdmull_n<mode>"
6343   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6344         (ss_ashift:<VWIDE>
6345              (mult:<VWIDE>
6346                (sign_extend:<VWIDE>
6347                  (match_operand:VD_HSI 1 "register_operand" "w"))
6348                (vec_duplicate:<VWIDE>
6349                  (sign_extend:<VWIDE_S>
6350                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6351                )
6352              (const_int 1)))]
6353   "TARGET_SIMD"
6354   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6355   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6358 ;; vqdmull2
6360 (define_insn "aarch64_sqdmull2<mode>_internal"
6361   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6362         (ss_ashift:<VWIDE>
6363              (mult:<VWIDE>
6364                (sign_extend:<VWIDE>
6365                  (vec_select:<VHALF>
6366                    (match_operand:VQ_HSI 1 "register_operand" "w")
6367                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6368                (sign_extend:<VWIDE>
6369                  (vec_select:<VHALF>
6370                    (match_operand:VQ_HSI 2 "register_operand" "w")
6371                    (match_dup 3)))
6372                )
6373              (const_int 1)))]
6374   "TARGET_SIMD"
6375   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6376   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6379 (define_expand "aarch64_sqdmull2<mode>"
6380   [(match_operand:<VWIDE> 0 "register_operand")
6381    (match_operand:VQ_HSI 1 "register_operand")
6382    (match_operand:VQ_HSI 2 "register_operand")]
6383   "TARGET_SIMD"
6385   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6386   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6387                                                   operands[2], p));
6388   DONE;
6391 ;; vqdmull2_lane
6393 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6394   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6395         (ss_ashift:<VWIDE>
6396              (mult:<VWIDE>
6397                (sign_extend:<VWIDE>
6398                  (vec_select:<VHALF>
6399                    (match_operand:VQ_HSI 1 "register_operand" "w")
6400                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6401                (vec_duplicate:<VWIDE>
6402                  (sign_extend:<VWIDE_S>
6403                    (vec_select:<VEL>
6404                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6405                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6406                ))
6407              (const_int 1)))]
6408   "TARGET_SIMD"
6409   {
6410     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6411     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6412   }
6413   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6416 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6417   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6418         (ss_ashift:<VWIDE>
6419              (mult:<VWIDE>
6420                (sign_extend:<VWIDE>
6421                  (vec_select:<VHALF>
6422                    (match_operand:VQ_HSI 1 "register_operand" "w")
6423                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6424                (vec_duplicate:<VWIDE>
6425                  (sign_extend:<VWIDE_S>
6426                    (vec_select:<VEL>
6427                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6428                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6429                ))
6430              (const_int 1)))]
6431   "TARGET_SIMD"
6432   {
6433     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6434     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6435   }
6436   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6439 (define_expand "aarch64_sqdmull2_lane<mode>"
6440   [(match_operand:<VWIDE> 0 "register_operand")
6441    (match_operand:VQ_HSI 1 "register_operand")
6442    (match_operand:<VCOND> 2 "register_operand")
6443    (match_operand:SI 3 "immediate_operand")]
6444   "TARGET_SIMD"
6446   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6447   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6448                                                        operands[2], operands[3],
6449                                                        p));
6450   DONE;
6453 (define_expand "aarch64_sqdmull2_laneq<mode>"
6454   [(match_operand:<VWIDE> 0 "register_operand")
6455    (match_operand:VQ_HSI 1 "register_operand")
6456    (match_operand:<VCONQ> 2 "register_operand")
6457    (match_operand:SI 3 "immediate_operand")]
6458   "TARGET_SIMD"
6460   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6461   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6462                                                        operands[2], operands[3],
6463                                                        p));
6464   DONE;
6467 ;; vqdmull2_n
6469 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6470   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6471         (ss_ashift:<VWIDE>
6472              (mult:<VWIDE>
6473                (sign_extend:<VWIDE>
6474                  (vec_select:<VHALF>
6475                    (match_operand:VQ_HSI 1 "register_operand" "w")
6476                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6477                (vec_duplicate:<VWIDE>
6478                  (sign_extend:<VWIDE_S>
6479                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6480                )
6481              (const_int 1)))]
6482   "TARGET_SIMD"
6483   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6484   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6487 (define_expand "aarch64_sqdmull2_n<mode>"
6488   [(match_operand:<VWIDE> 0 "register_operand")
6489    (match_operand:VQ_HSI 1 "register_operand")
6490    (match_operand:<VEL> 2 "register_operand")]
6491   "TARGET_SIMD"
6493   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6494   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6495                                                     operands[2], p));
6496   DONE;
6499 ;; vshl
6501 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6502   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6503         (unspec:VSDQ_I_DI
6504           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6505            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6506          VSHL))]
6507   "TARGET_SIMD"
6508   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6509   [(set_attr "type" "neon_shift_reg<q>")]
6513 ;; vqshl
6515 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6516   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6517         (unspec:VSDQ_I
6518           [(match_operand:VSDQ_I 1 "register_operand" "w")
6519            (match_operand:VSDQ_I 2 "register_operand" "w")]
6520          VQSHL))]
6521   "TARGET_SIMD"
6522   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6523   [(set_attr "type" "neon_sat_shift_reg<q>")]
6526 ;; vshll_n
6528 (define_insn "aarch64_<su>shll<mode>"
6529   [(set (match_operand:<VWIDE> 0 "register_operand")
6530         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6531                             (match_operand:VD_BHSI 1 "register_operand"))
6532                          (match_operand:<VWIDE> 2
6533                            "aarch64_simd_shll_imm_vec")))]
6534   "TARGET_SIMD"
6535   {@ [cons: =0, 1, 2]
6536      [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6537      [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6538   }
6539   [(set_attr "type" "neon_shift_imm_long")]
6542 (define_expand "aarch64_<sur>shll_n<mode>"
6543   [(set (match_operand:<VWIDE> 0 "register_operand")
6544         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6545                          (match_operand:SI 2
6546                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6547                          VSHLL))]
6548   "TARGET_SIMD"
6549   {
6550     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6551     emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6552     DONE;
6553   }
6556 ;; vshll_high_n
6558 (define_insn "aarch64_<su>shll2<mode>"
6559   [(set (match_operand:<VWIDE> 0 "register_operand")
6560         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6561                           (vec_select:<VHALF>
6562                             (match_operand:VQW 1 "register_operand")
6563                             (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6564                          (match_operand:<VWIDE> 3
6565                            "aarch64_simd_shll_imm_vec")))]
6566   "TARGET_SIMD"
6567   {@ [cons: =0, 1, 2, 3]
6568      [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6569      [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6570   }
6571   [(set_attr "type" "neon_shift_imm_long")]
6574 (define_expand "aarch64_<sur>shll2_n<mode>"
6575   [(set (match_operand:<VWIDE> 0 "register_operand")
6576         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6577                          (match_operand:SI 2
6578                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6579                          VSHLL))]
6580   "TARGET_SIMD"
6581   {
6582     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6583     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6584     emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6585     DONE;
6586   }
6589 ;; vrshr_n
6591 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6592   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6593         (truncate:VSDQ_I_DI
6594           (SHIFTRT:<V2XWIDE>
6595             (plus:<V2XWIDE>
6596               (<SHIFTEXTEND>:<V2XWIDE>
6597                 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6598               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6599             (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6600   "TARGET_SIMD
6601    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6602   "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6603   [(set_attr "type" "neon_sat_shift_imm<q>")]
6606 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6607   [(match_operand:VSDQ_I_DI 0 "register_operand")
6608    (SHIFTRT:VSDQ_I_DI
6609      (match_operand:VSDQ_I_DI 1 "register_operand")
6610      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6611   "TARGET_SIMD"
6612   {
6613     /* Use this expander to create the rounding constant vector, which is
6614        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6615        RTL is generated when handling the DImode expanders.  */
6616     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6617     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6618     rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6619     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6620     if (VECTOR_MODE_P (<MODE>mode))
6621       {
6622         shft = gen_const_vec_duplicate (<MODE>mode, shft);
6623         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6624       }
6626     emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6627                                                       shft, rnd));
6628     DONE;
6629   }
6632 ;; v(r)sra_n
6634 (define_insn "aarch64_<sur>sra_ndi"
6635   [(set (match_operand:DI 0 "register_operand" "=w")
6636        (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6637                       (match_operand:DI 2 "register_operand" "w")
6638                        (match_operand:SI 3
6639                         "aarch64_simd_shift_imm_offset_di" "i")]
6640                       VSRA))]
6641   "TARGET_SIMD"
6642   "<sur>sra\\t%d0, %d2, %3"
6643   [(set_attr "type" "neon_shift_acc")]
6646 ;; vs<lr>i_n
6648 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6649   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6650         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6651                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6652                        (match_operand:SI 3
6653                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6654                       VSLRI))]
6655   "TARGET_SIMD"
6656   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6657   [(set_attr "type" "neon_shift_imm<q>")]
6660 ;; vqshl(u)
6662 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6663   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6664         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6665                        (match_operand:SI 2
6666                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
6667                       VQSHL_N))]
6668   "TARGET_SIMD"
6669   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6670   [(set_attr "type" "neon_sat_shift_imm<q>")]
6674 ;; vq(r)shr(u)n_n
6676 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6677   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6678         (SAT_TRUNC:<VNARROWQ>
6679           (<TRUNC_SHIFT>:SD_HSDI
6680             (match_operand:SD_HSDI 1 "register_operand" "w")
6681             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6682   "TARGET_SIMD"
6683   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6684   [(set_attr "type" "neon_shift_imm_narrow_q")]
6687 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6688   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6689         (ALL_TRUNC:<VNARROWQ>
6690           (SHIFTRT:VQN
6691             (match_operand:VQN 1 "register_operand" "w")
6692             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6693   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6694   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6695   [(set_attr "type" "neon_shift_imm_narrow_q")]
6698 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6699   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6700         (ALL_TRUNC:<VNARROWQ>
6701           (<TRUNC_SHIFT>:VQN
6702             (match_operand:VQN 1 "register_operand")
6703             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6704   "TARGET_SIMD"
6705   {
6706     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6707                                                  INTVAL (operands[2]));
6708   }
6711 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6712   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6713         (ALL_TRUNC:<VNARROWQ>
6714           (<TRUNC_SHIFT>:<V2XWIDE>
6715             (plus:<V2XWIDE>
6716               (<TRUNCEXTEND>:<V2XWIDE>
6717                 (match_operand:VQN 1 "register_operand" "w"))
6718               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6719             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6720   "TARGET_SIMD
6721    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6722   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6723   [(set_attr "type" "neon_shift_imm_narrow_q")]
6726 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6727   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6728         (SAT_TRUNC:<VNARROWQ>
6729           (<TRUNC_SHIFT>:<DWI>
6730             (plus:<DWI>
6731               (<TRUNCEXTEND>:<DWI>
6732                 (match_operand:SD_HSDI 1 "register_operand" "w"))
6733               (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6734             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6735   "TARGET_SIMD
6736    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6737   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6738   [(set_attr "type" "neon_shift_imm_narrow_q")]
6741 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6742   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6743         (SAT_TRUNC:<VNARROWQ>
6744           (<TRUNC_SHIFT>:<V2XWIDE>
6745             (plus:<V2XWIDE>
6746               (<TRUNCEXTEND>:<V2XWIDE>
6747                 (match_operand:SD_HSDI 1 "register_operand"))
6748               (match_dup 3))
6749             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6750   "TARGET_SIMD"
6751   {
6752     /* Use this expander to create the rounding constant vector, which is
6753        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6754        RTL is generated when handling the DImode expanders.  */
6755     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6756     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6757     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6758   }
6761 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6762   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6763         (ALL_TRUNC:<VNARROWQ>
6764           (<TRUNC_SHIFT>:<V2XWIDE>
6765             (plus:<V2XWIDE>
6766               (<TRUNCEXTEND>:<V2XWIDE>
6767                 (match_operand:VQN 1 "register_operand"))
6768               (match_dup 3))
6769             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6770   "TARGET_SIMD"
6771   {
6772     if (<CODE> == TRUNCATE
6773         && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6774       {
6775         rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6776         emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6777         DONE;
6778       }
6779     /* Use this expander to create the rounding constant vector, which is
6780        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6781        RTL is generated when handling the DImode expanders.  */
6782     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6783     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6784     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6785     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6786     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6787   }
6790 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6791   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6792         (truncate:<VNARROWQ>
6793           (smin:VQN
6794             (smax:VQN
6795               (ashiftrt:VQN
6796                 (match_operand:VQN 1 "register_operand" "w")
6797                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6798               (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6799             (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6800   "TARGET_SIMD"
6801   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6802   [(set_attr "type" "neon_shift_imm_narrow_q")]
6805 (define_insn "aarch64_sqshrun_n<mode>_insn"
6806   [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6807         (smin:SD_HSDI
6808           (smax:SD_HSDI
6809             (ashiftrt:SD_HSDI
6810               (match_operand:SD_HSDI 1 "register_operand" "w")
6811               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6812             (const_int 0))
6813           (const_int <half_mask>)))]
6814   "TARGET_SIMD"
6815   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6816   [(set_attr "type" "neon_shift_imm_narrow_q")]
6819 (define_expand "aarch64_sqshrun_n<mode>"
6820   [(match_operand:<VNARROWQ> 0 "register_operand")
6821    (match_operand:SD_HSDI 1 "register_operand")
6822    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6823   "TARGET_SIMD"
6824   {
6825     rtx dst = gen_reg_rtx (<MODE>mode);
6826     emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6827                                                  operands[2]));
6828     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6829     DONE;
6830   }
6833 (define_expand "aarch64_sqshrun_n<mode>"
6834   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6835         (truncate:<VNARROWQ>
6836           (smin:VQN
6837             (smax:VQN
6838               (ashiftrt:VQN
6839                 (match_operand:VQN 1 "register_operand")
6840                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6841               (match_dup 3))
6842             (match_dup 4))))]
6843   "TARGET_SIMD"
6844   {
6845     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6846                                                  INTVAL (operands[2]));
6847     operands[3] = CONST0_RTX (<MODE>mode);
6848     operands[4]
6849       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6850                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6851   }
6854 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6855   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6856         (truncate:<VNARROWQ>
6857           (smin:<V2XWIDE>
6858             (smax:<V2XWIDE>
6859               (ashiftrt:<V2XWIDE>
6860                 (plus:<V2XWIDE>
6861                   (sign_extend:<V2XWIDE>
6862                     (match_operand:VQN 1 "register_operand" "w"))
6863                   (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6864                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6865               (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6866             (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6867   "TARGET_SIMD
6868    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6869   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6870   [(set_attr "type" "neon_shift_imm_narrow_q")]
6873 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6874   [(set (match_operand:<DWI> 0 "register_operand" "=w")
6875         (smin:<DWI>
6876           (smax:<DWI>
6877             (ashiftrt:<DWI>
6878               (plus:<DWI>
6879                 (sign_extend:<DWI>
6880                   (match_operand:SD_HSDI 1 "register_operand" "w"))
6881                 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6882               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6883             (const_int 0))
6884           (const_int <half_mask>)))]
6885   "TARGET_SIMD
6886    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6887   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6888   [(set_attr "type" "neon_shift_imm_narrow_q")]
6891 (define_expand "aarch64_sqrshrun_n<mode>"
6892   [(match_operand:<VNARROWQ> 0 "register_operand")
6893    (match_operand:SD_HSDI 1 "register_operand")
6894    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6895   "TARGET_SIMD"
6896   {
6897     int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6898     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6899     rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6900     rtx dst = gen_reg_rtx (<DWI>mode);
6901     emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6902     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6903     DONE;
6904   }
6907 (define_expand "aarch64_sqrshrun_n<mode>"
6908   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6909         (truncate:<VNARROWQ>
6910           (smin:<V2XWIDE>
6911             (smax:<V2XWIDE>
6912               (ashiftrt:<V2XWIDE>
6913                 (plus:<V2XWIDE>
6914                   (sign_extend:<V2XWIDE>
6915                     (match_operand:VQN 1 "register_operand"))
6916                   (match_dup 3))
6917                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6918               (match_dup 4))
6919             (match_dup 5))))]
6920   "TARGET_SIMD"
6921   {
6922     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6923     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6924     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6925     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6926     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6927     operands[4] = CONST0_RTX (<V2XWIDE>mode);
6928     operands[5]
6929       = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6930     operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6931   }
6934 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6935   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6936         (vec_concat:<VNARROWQ2>
6937           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6938           (ALL_TRUNC:<VNARROWQ>
6939             (SHIFTRT:VQN
6940               (match_operand:VQN 2 "register_operand" "w")
6941               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6942   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6943    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6944   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6945   [(set_attr "type" "neon_shift_imm_narrow_q")]
6948 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6949   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6950         (vec_concat:<VNARROWQ2>
6951           (ALL_TRUNC:<VNARROWQ>
6952             (SHIFTRT:VQN
6953               (match_operand:VQN 2 "register_operand" "w")
6954               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6955           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6956   "TARGET_SIMD && BYTES_BIG_ENDIAN
6957    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6958   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6959   [(set_attr "type" "neon_shift_imm_narrow_q")]
6962 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6963   [(match_operand:<VNARROWQ2> 0 "register_operand")
6964    (match_operand:<VNARROWQ> 1 "register_operand")
6965    (ALL_TRUNC:<VNARROWQ>
6966      (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6967    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6968   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6969   {
6970     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6971                                                  INTVAL (operands[3]));
6973     if (BYTES_BIG_ENDIAN)
6974       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6975                 operands[0], operands[1], operands[2], operands[3]));
6976     else
6977       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6978                 operands[0], operands[1], operands[2], operands[3]));
6979     DONE;
6980   }
6983 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6984   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6985         (vec_concat:<VNARROWQ2>
6986           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6987           (ALL_TRUNC:<VNARROWQ>
6988             (<TRUNC_SHIFT>:<V2XWIDE>
6989               (plus:<V2XWIDE>
6990                 (<TRUNCEXTEND>:<V2XWIDE>
6991                   (match_operand:VQN 2 "register_operand" "w"))
6992                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6993               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6994   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6995    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6996   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6997   [(set_attr "type" "neon_shift_imm_narrow_q")]
7000 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
7001   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7002         (vec_concat:<VNARROWQ2>
7003           (ALL_TRUNC:<VNARROWQ>
7004             (<TRUNC_SHIFT>:<V2XWIDE>
7005               (plus:<V2XWIDE>
7006                 (<TRUNCEXTEND>:<V2XWIDE>
7007                   (match_operand:VQN 2 "register_operand" "w"))
7008                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7009               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
7010           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7011   "TARGET_SIMD && BYTES_BIG_ENDIAN
7012    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7013   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7014   [(set_attr "type" "neon_shift_imm_narrow_q")]
7017 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
7018   [(match_operand:<VNARROWQ2> 0 "register_operand")
7019    (match_operand:<VNARROWQ> 1 "register_operand")
7020    (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
7021    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7022   "TARGET_SIMD"
7023   {
7024     if (<CODE> == TRUNCATE
7025         && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
7026       {
7027         rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
7028         emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
7029                                               operands[2], tmp));
7030         DONE;
7031       }
7032     /* Use this expander to create the rounding constant vector, which is
7033        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
7034        RTL is generated when handling the DImode expanders.  */
7035     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7036     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7037     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7038     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7039     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7040     if (BYTES_BIG_ENDIAN)
7041       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
7042                                                               operands[1],
7043                                                               operands[2],
7044                                                               operands[3],
7045                                                               rnd));
7046     else
7047       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
7048                                                               operands[1],
7049                                                               operands[2],
7050                                                               operands[3],
7051                                                               rnd));
7052     DONE;
7053   }
7056 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
7057   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7058         (vec_concat:<VNARROWQ2>
7059           (match_operand:<VNARROWQ> 1 "register_operand" "0")
7060           (truncate:<VNARROWQ>
7061             (smin:VQN
7062               (smax:VQN
7063                 (ashiftrt:VQN
7064                   (match_operand:VQN 2 "register_operand" "w")
7065                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7066                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
7067               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
7068   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
7069   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7070   [(set_attr "type" "neon_shift_imm_narrow_q")]
7073 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
7074   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7075         (vec_concat:<VNARROWQ2>
7076           (truncate:<VNARROWQ>
7077             (smin:VQN
7078               (smax:VQN
7079                 (ashiftrt:VQN
7080                   (match_operand:VQN 2 "register_operand" "w")
7081                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7082                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
7083               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
7084           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7085   "TARGET_SIMD && BYTES_BIG_ENDIAN"
7086   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7087   [(set_attr "type" "neon_shift_imm_narrow_q")]
7090 (define_expand "aarch64_sqshrun2_n<mode>"
7091   [(match_operand:<VNARROWQ2> 0 "register_operand")
7092    (match_operand:<VNARROWQ> 1 "register_operand")
7093    (match_operand:VQN 2 "register_operand")
7094    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7095   "TARGET_SIMD"
7096   {
7097     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
7098                                                  INTVAL (operands[3]));
7099     rtx zeros = CONST0_RTX (<MODE>mode);
7100     rtx half_umax
7101       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
7102                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7103     if (BYTES_BIG_ENDIAN)
7104       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
7105                                 operands[1], operands[2], operands[3],
7106                                 zeros, half_umax));
7107     else
7108       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
7109                                 operands[1], operands[2], operands[3],
7110                                 zeros, half_umax));
7111     DONE;
7112   }
7115 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
7116   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7117         (vec_concat:<VNARROWQ2>
7118           (match_operand:<VNARROWQ> 1 "register_operand" "0")
7119           (truncate:<VNARROWQ>
7120             (smin:<V2XWIDE>
7121               (smax:<V2XWIDE>
7122                 (ashiftrt:<V2XWIDE>
7123                   (plus:<V2XWIDE>
7124                     (sign_extend:<V2XWIDE>
7125                       (match_operand:VQN 2 "register_operand" "w"))
7126                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7127                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7128                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7129               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
7130   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7131    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7132   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7133   [(set_attr "type" "neon_shift_imm_narrow_q")]
7136 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
7137   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7138         (vec_concat:<VNARROWQ2>
7139           (truncate:<VNARROWQ>
7140             (smin:<V2XWIDE>
7141               (smax:<V2XWIDE>
7142                 (ashiftrt:<V2XWIDE>
7143                   (plus:<V2XWIDE>
7144                     (sign_extend:<V2XWIDE>
7145                       (match_operand:VQN 2 "register_operand" "w"))
7146                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7147                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7148                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7149               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
7150           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7151   "TARGET_SIMD && BYTES_BIG_ENDIAN
7152    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7153   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7154   [(set_attr "type" "neon_shift_imm_narrow_q")]
7157 (define_expand "aarch64_sqrshrun2_n<mode>"
7158   [(match_operand:<VNARROWQ2> 0 "register_operand")
7159    (match_operand:<VNARROWQ> 1 "register_operand")
7160    (match_operand:VQN 2 "register_operand")
7161    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7162   "TARGET_SIMD"
7163   {
7164     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7165     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7166     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7167     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7168     rtx zero = CONST0_RTX (<V2XWIDE>mode);
7169     rtx half_umax
7170       = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7171                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7172     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7173     if (BYTES_BIG_ENDIAN)
7174       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7175                                 operands[1], operands[2], operands[3], rnd,
7176                                 zero, half_umax));
7177     else
7178       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7179                                 operands[1], operands[2], operands[3], rnd,
7180                                 zero, half_umax));
7181     DONE;
7182   }
7185 ;; cm(eq|ge|gt|lt|le)
7186 ;; Note, we have constraints for Dz and Z as different expanders
7187 ;; have different ideas of what should be passed to this pattern.
7189 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7190   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7191         (neg:<V_INT_EQUIV>
7192           (COMPARISONS:<V_INT_EQUIV>
7193             (match_operand:VDQ_I 1 "register_operand")
7194             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")
7195           )))]
7196   "TARGET_SIMD"
7197   {@ [ cons: =0 , 1 , 2   ; attrs: type           ]
7198      [ w        , w , w   ; neon_compare<q>       ] cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7199      [ w        , w , ZDz ; neon_compare_zero<q>  ] cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0
7200   }
7203 (define_insn_and_split "aarch64_cm<optab>di"
7204   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7205         (neg:DI
7206           (COMPARISONS:DI
7207             (match_operand:DI 1 "register_operand" "w,w,r")
7208             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7209           )))
7210      (clobber (reg:CC CC_REGNUM))]
7211   "TARGET_SIMD"
7212   "#"
7213   "&& reload_completed"
7214   [(set (match_operand:DI 0 "register_operand")
7215         (neg:DI
7216           (COMPARISONS:DI
7217             (match_operand:DI 1 "register_operand")
7218             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7219           )))]
7220   {
7221     /* If we are in the general purpose register file,
7222        we split to a sequence of comparison and store.  */
7223     if (GP_REGNUM_P (REGNO (operands[0]))
7224         && GP_REGNUM_P (REGNO (operands[1])))
7225       {
7226         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7227         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7228         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7229         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7230         DONE;
7231       }
7232     /* Otherwise, we expand to a similar pattern which does not
7233        clobber CC_REGNUM.  */
7234   }
7235   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7238 (define_insn "*aarch64_cm<optab>di"
7239   [(set (match_operand:DI 0 "register_operand")
7240         (neg:DI
7241           (COMPARISONS:DI
7242             (match_operand:DI 1 "register_operand")
7243             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7244           )))]
7245   "TARGET_SIMD && reload_completed"
7246   {@ [ cons: =0 , 1 , 2   ; attrs: type        ]
7247      [ w        , w , w   ; neon_compare       ] cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7248      [ w        , w , ZDz ; neon_compare_zero  ] cm<optab>\t%d0, %d1, #0
7249   }
7252 ;; cm(hs|hi)
7254 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7255   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7256         (neg:<V_INT_EQUIV>
7257           (UCOMPARISONS:<V_INT_EQUIV>
7258             (match_operand:VDQ_I 1 "register_operand" "w")
7259             (match_operand:VDQ_I 2 "register_operand" "w")
7260           )))]
7261   "TARGET_SIMD"
7262   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7263   [(set_attr "type" "neon_compare<q>")]
7266 (define_insn_and_split "aarch64_cm<optab>di"
7267   [(set (match_operand:DI 0 "register_operand" "=w,r")
7268         (neg:DI
7269           (UCOMPARISONS:DI
7270             (match_operand:DI 1 "register_operand" "w,r")
7271             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7272           )))
7273     (clobber (reg:CC CC_REGNUM))]
7274   "TARGET_SIMD"
7275   "#"
7276   "&& reload_completed"
7277   [(set (match_operand:DI 0 "register_operand")
7278         (neg:DI
7279           (UCOMPARISONS:DI
7280             (match_operand:DI 1 "register_operand")
7281             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7282           )))]
7283   {
7284     /* If we are in the general purpose register file,
7285        we split to a sequence of comparison and store.  */
7286     if (GP_REGNUM_P (REGNO (operands[0]))
7287         && GP_REGNUM_P (REGNO (operands[1])))
7288       {
7289         machine_mode mode = CCmode;
7290         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7291         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7292         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7293         DONE;
7294       }
7295     /* Otherwise, we expand to a similar pattern which does not
7296        clobber CC_REGNUM.  */
7297   }
7298   [(set_attr "type" "neon_compare,multiple")]
7301 (define_insn "*aarch64_cm<optab>di"
7302   [(set (match_operand:DI 0 "register_operand" "=w")
7303         (neg:DI
7304           (UCOMPARISONS:DI
7305             (match_operand:DI 1 "register_operand" "w")
7306             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7307           )))]
7308   "TARGET_SIMD && reload_completed"
7309   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7310   [(set_attr "type" "neon_compare")]
7313 ;; cmtst
7315 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7316 ;; we don't have any insns using ne, and aarch64_vcond outputs
7317 ;; not (neg (eq (and x y) 0))
7318 ;; which is rewritten by simplify_rtx as
7319 ;; plus (eq (and x y) 0) -1.
7321 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7322   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7323         (plus:<V_INT_EQUIV>
7324           (eq:<V_INT_EQUIV>
7325             (and:VDQ_I
7326               (match_operand:VDQ_I 1 "register_operand" "w")
7327               (match_operand:VDQ_I 2 "register_operand" "w"))
7328             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7329           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7330   ]
7331   "TARGET_SIMD"
7332   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7333   [(set_attr "type" "neon_tst<q>")]
7336 ;; One can also get a cmtsts by having to combine a
7337 ;; not (neq (eq x 0)) in which case you rewrite it to
7338 ;; a comparison against itself
7340 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7341   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7342         (plus:<V_INT_EQUIV>
7343           (eq:<V_INT_EQUIV>
7344             (match_operand:VDQ_I 1 "register_operand" "w")
7345             (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7346           (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7347   ]
7348   "TARGET_SIMD"
7349   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7350   [(set_attr "type" "neon_tst<q>")]
7353 (define_insn_and_split "aarch64_cmtstdi"
7354   [(set (match_operand:DI 0 "register_operand" "=w,r")
7355         (neg:DI
7356           (ne:DI
7357             (and:DI
7358               (match_operand:DI 1 "register_operand" "w,r")
7359               (match_operand:DI 2 "register_operand" "w,r"))
7360             (const_int 0))))
7361     (clobber (reg:CC CC_REGNUM))]
7362   "TARGET_SIMD"
7363   "#"
7364   "&& reload_completed"
7365   [(set (match_operand:DI 0 "register_operand")
7366         (neg:DI
7367           (ne:DI
7368             (and:DI
7369               (match_operand:DI 1 "register_operand")
7370               (match_operand:DI 2 "register_operand"))
7371             (const_int 0))))]
7372   {
7373     /* If we are in the general purpose register file,
7374        we split to a sequence of comparison and store.  */
7375     if (GP_REGNUM_P (REGNO (operands[0]))
7376         && GP_REGNUM_P (REGNO (operands[1])))
7377       {
7378         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7379         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7380         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7381         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7382         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7383         DONE;
7384       }
7385     /* Otherwise, we expand to a similar pattern which does not
7386        clobber CC_REGNUM.  */
7387   }
7388   [(set_attr "type" "neon_tst,multiple")]
7391 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7392   [(set (match_operand:DI 0 "register_operand" "=w")
7393         (neg:DI
7394           (ne:DI
7395             (and:DI
7396               (match_operand:DI 1 "register_operand" "w")
7397               (match_operand:DI 2 "register_operand" "w"))
7398             (const_int 0))))]
7399   "TARGET_SIMD"
7400   "cmtst\t%d0, %d1, %d2"
7401   [(set_attr "type" "neon_tst")]
7404 ;; fcm(eq|ge|gt|le|lt)
7406 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7407   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7408         (neg:<V_INT_EQUIV>
7409           (COMPARISONS:<V_INT_EQUIV>
7410             (match_operand:VHSDF_HSDF 1 "register_operand")
7411             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero")
7412           )))]
7413   "TARGET_SIMD"
7414   {@ [ cons: =0 , 1 , 2    ]
7415      [ w        , w , w    ] fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7416      [ w        , w , YDz  ] fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0
7417   }
7418   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7421 ;; fac(ge|gt)
7422 ;; Note we can also handle what would be fac(le|lt) by
7423 ;; generating fac(ge|gt).
7425 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7426   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7427         (neg:<V_INT_EQUIV>
7428           (FAC_COMPARISONS:<V_INT_EQUIV>
7429             (abs:VHSDF_HSDF
7430               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7431             (abs:VHSDF_HSDF
7432               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7433   )))]
7434   "TARGET_SIMD"
7435   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7436   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7439 ;; addp
7441 ;; ADDP with two registers semantically concatenates them and performs
7442 ;; a pairwise addition on the result.  For 128-bit input modes represent this
7443 ;; as a concatentation of the pairwise addition results of the two input
7444 ;; registers.  This allow us to avoid using intermediate 256-bit modes.
7445 (define_insn "aarch64_addp<mode>_insn"
7446   [(set (match_operand:VQ_I 0 "register_operand" "=w")
7447         (vec_concat:VQ_I
7448           (plus:<VHALF>
7449             (vec_select:<VHALF>
7450               (match_operand:VQ_I 1 "register_operand" "w")
7451               (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7452             (vec_select:<VHALF>
7453               (match_dup 1)
7454               (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7455           (plus:<VHALF>
7456             (vec_select:<VHALF>
7457               (match_operand:VQ_I 2 "register_operand" "w")
7458               (match_dup 3))
7459             (vec_select:<VHALF>
7460               (match_dup 2)
7461               (match_dup 4)))))]
7462   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7463   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7464   [(set_attr "type" "neon_reduc_add<q>")]
7467 ;; For 64-bit input modes an ADDP is represented as a concatentation
7468 ;; of the input registers into an 128-bit register which is then fed
7469 ;; into a pairwise add.  That way we avoid having to create intermediate
7470 ;; 32-bit vector modes.
7471 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7472   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7473         (plus:VD_BHSI
7474           (vec_select:VD_BHSI
7475             (vec_concat:<VDBL>
7476               (match_operand:VD_BHSI 1 "register_operand" "w")
7477               (match_operand:VD_BHSI 2 "register_operand" "w"))
7478             (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7479           (vec_select:VD_BHSI
7480             (vec_concat:<VDBL>
7481               (match_dup 1)
7482               (match_dup 2))
7483             (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7484   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7485   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7486   [(set_attr "type" "neon_reduc_add<q>")]
7489 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7490 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7491 ;; Split into the 128-bit ADDP form and extract the low half.
7492 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7493   [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7494         (plus:<VHALF>
7495           (vec_select:<VHALF>
7496             (match_operand:VQ_I 1 "register_operand" "w")
7497             (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7498           (vec_select:<VHALF>
7499             (match_dup 1)
7500             (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7501   "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7502   "#"
7503   "&& 1"
7504   [(const_int 0)]
7505   {
7506     rtx scratch;
7507     if (can_create_pseudo_p ())
7508       scratch = gen_reg_rtx (<MODE>mode);
7509     else
7510       scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7512     emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7513                                             operands[2], operands[3]));
7514     emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7515     DONE;
7516   }
7519 (define_expand "aarch64_addp<mode>"
7520   [(match_operand:VDQ_I 0 "register_operand")
7521    (match_operand:VDQ_I 1 "register_operand")
7522    (match_operand:VDQ_I 2 "register_operand")]
7523   "TARGET_SIMD"
7524   {
7525     int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7526     if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7527       nunits /= 2;
7528     rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7529     rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7530     emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7531                                             operands[2], par_even, par_odd));
7532     DONE;
7533   }
7536 ;; sqrt
7538 (define_expand "sqrt<mode>2"
7539   [(set (match_operand:VHSDF 0 "register_operand")
7540         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7541   "TARGET_SIMD"
7543   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7544     DONE;
7547 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7548   [(set (match_operand:VHSDF 0 "register_operand" "=w")
7549         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7550   "TARGET_SIMD"
7551   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7552   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7555 ;; Patterns for vector struct loads and stores.
7557 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7558   [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7559         (unspec:VSTRUCT_2Q [
7560           (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7561           UNSPEC_LD2))]
7562   "TARGET_SIMD"
7563   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7564   [(set_attr "type" "neon_load2_2reg<q>")]
7567 (define_insn "@aarch64_simd_ld2r<vstruct_elt>"
7568   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7569         (unspec:VSTRUCT_2QD [
7570           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7571           UNSPEC_LD2_DUP))]
7572   "TARGET_SIMD"
7573   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7574   [(set_attr "type" "neon_load2_all_lanes<q>")]
7577 (define_insn "@aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7578   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7579         (unspec:VSTRUCT_2QD [
7580                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7581                 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7582                 (match_operand:SI 3 "immediate_operand" "i")]
7583                 UNSPEC_LD2_LANE))]
7584   "TARGET_SIMD"
7585   {
7586     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7587                                            INTVAL (operands[3]));
7588     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7589   }
7590   [(set_attr "type" "neon_load2_one_lane")]
7593 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7594   [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7595         (unspec:VSTRUCT_2Q [
7596                 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7597                 UNSPEC_LD2))]
7598   "TARGET_SIMD"
7600   if (BYTES_BIG_ENDIAN)
7601     {
7602       rtx tmp = gen_reg_rtx (<MODE>mode);
7603       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7604                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7605       emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7606       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7607     }
7608   else
7609     emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7610   DONE;
7613 (define_insn "aarch64_simd_st2<vstruct_elt>"
7614   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7615         (unspec:VSTRUCT_2Q [
7616                 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7617                 UNSPEC_ST2))]
7618   "TARGET_SIMD"
7619   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7620   [(set_attr "type" "neon_store2_2reg<q>")]
7623 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7624 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7625   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7626         (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7627                      (match_operand:SI 2 "immediate_operand" "i")]
7628                      UNSPEC_ST2_LANE))]
7629   "TARGET_SIMD"
7630   {
7631     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7632                                            INTVAL (operands[2]));
7633     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7634   }
7635   [(set_attr "type" "neon_store2_one_lane<q>")]
7638 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7639   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7640         (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7641                    UNSPEC_ST2))]
7642   "TARGET_SIMD"
7644   if (BYTES_BIG_ENDIAN)
7645     {
7646       rtx tmp = gen_reg_rtx (<MODE>mode);
7647       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7648                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7649       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7650       emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7651     }
7652   else
7653     emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7654   DONE;
7657 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7658   [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7659         (unspec:VSTRUCT_3Q [
7660           (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7661           UNSPEC_LD3))]
7662   "TARGET_SIMD"
7663   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7664   [(set_attr "type" "neon_load3_3reg<q>")]
7667 (define_insn "@aarch64_simd_ld3r<vstruct_elt>"
7668   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7669         (unspec:VSTRUCT_3QD [
7670           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7671           UNSPEC_LD3_DUP))]
7672   "TARGET_SIMD"
7673   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7674   [(set_attr "type" "neon_load3_all_lanes<q>")]
7677 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7678   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7679         (unspec:VSTRUCT_3QD [
7680                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7681                 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7682                 (match_operand:SI 3 "immediate_operand" "i")]
7683                 UNSPEC_LD3_LANE))]
7684   "TARGET_SIMD"
7686     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7687                                            INTVAL (operands[3]));
7688     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7690   [(set_attr "type" "neon_load3_one_lane")]
7693 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7694   [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7695         (unspec:VSTRUCT_3Q [
7696                 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7697                 UNSPEC_LD3))]
7698   "TARGET_SIMD"
7700   if (BYTES_BIG_ENDIAN)
7701     {
7702       rtx tmp = gen_reg_rtx (<MODE>mode);
7703       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7704                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7705       emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7706       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7707     }
7708   else
7709     emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7710   DONE;
7713 (define_insn "aarch64_simd_st3<vstruct_elt>"
7714   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7715         (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7716                    UNSPEC_ST3))]
7717   "TARGET_SIMD"
7718   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7719   [(set_attr "type" "neon_store3_3reg<q>")]
7722 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7723 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7724   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7725         (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7726                      (match_operand:SI 2 "immediate_operand" "i")]
7727                      UNSPEC_ST3_LANE))]
7728   "TARGET_SIMD"
7729   {
7730     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7731                                            INTVAL (operands[2]));
7732     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7733   }
7734   [(set_attr "type" "neon_store3_one_lane<q>")]
7737 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7738   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7739         (unspec:VSTRUCT_3Q [
7740                 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7741                 UNSPEC_ST3))]
7742   "TARGET_SIMD"
7744   if (BYTES_BIG_ENDIAN)
7745     {
7746       rtx tmp = gen_reg_rtx (<MODE>mode);
7747       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7748                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7749       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7750       emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7751     }
7752   else
7753     emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7754   DONE;
7757 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7758   [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7759         (unspec:VSTRUCT_4Q [
7760           (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7761           UNSPEC_LD4))]
7762   "TARGET_SIMD"
7763   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7764   [(set_attr "type" "neon_load4_4reg<q>")]
7767 (define_insn "@aarch64_simd_ld4r<vstruct_elt>"
7768   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7769         (unspec:VSTRUCT_4QD [
7770           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7771           UNSPEC_LD4_DUP))]
7772   "TARGET_SIMD"
7773   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7774   [(set_attr "type" "neon_load4_all_lanes<q>")]
7777 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7778   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7779         (unspec:VSTRUCT_4QD [
7780                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7781                 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7782                 (match_operand:SI 3 "immediate_operand" "i")]
7783                 UNSPEC_LD4_LANE))]
7784   "TARGET_SIMD"
7786     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7787                                            INTVAL (operands[3]));
7788     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7790   [(set_attr "type" "neon_load4_one_lane")]
7793 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7794   [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7795         (unspec:VSTRUCT_4Q [
7796                 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7797                 UNSPEC_LD4))]
7798   "TARGET_SIMD"
7800   if (BYTES_BIG_ENDIAN)
7801     {
7802       rtx tmp = gen_reg_rtx (<MODE>mode);
7803       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7804                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7805       emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7806       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7807     }
7808   else
7809     emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7810   DONE;
7813 (define_insn "aarch64_simd_st4<vstruct_elt>"
7814   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7815         (unspec:VSTRUCT_4Q [
7816                 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7817                 UNSPEC_ST4))]
7818   "TARGET_SIMD"
7819   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7820   [(set_attr "type" "neon_store4_4reg<q>")]
7823 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7824 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7825   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7826         (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7827                      (match_operand:SI 2 "immediate_operand" "i")]
7828                      UNSPEC_ST4_LANE))]
7829   "TARGET_SIMD"
7830   {
7831     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7832                                            INTVAL (operands[2]));
7833     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7834   }
7835   [(set_attr "type" "neon_store4_one_lane<q>")]
7838 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7839   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7840         (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7841                    UNSPEC_ST4))]
7842   "TARGET_SIMD"
7844   if (BYTES_BIG_ENDIAN)
7845     {
7846       rtx tmp = gen_reg_rtx (<MODE>mode);
7847       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7848                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7849       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7850       emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7851     }
7852   else
7853     emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7854   DONE;
7857 ;; Patterns for rcpc3 vector lane loads and stores.
7859 (define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
7860   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
7861         (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
7862                      (match_operand:SI 2 "immediate_operand" "i")]
7863                      UNSPEC_STL1_LANE))]
7864   "TARGET_RCPC3"
7865   {
7866     operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
7867                                            INTVAL (operands[2]));
7868     return "stl1\\t{%S1.<Vetype>}[%2], %0";
7869   }
7870   [(set_attr "type" "neon_store2_one_lane")]
7873 (define_expand "aarch64_vec_stl1_lane<mode>"
7874  [(match_operand:DI 0 "register_operand")
7875   (match_operand:V12DIF 1 "register_operand")
7876   (match_operand:SI 2 "immediate_operand")]
7877   "TARGET_RCPC3"
7879   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7880   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7882   aarch64_simd_lane_bounds (operands[2], 0,
7883                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7884   emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
7885                                         operands[1], operands[2]));
7886   DONE;
7889 (define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
7890   [(set (match_operand:V12DIF 0 "register_operand" "=w")
7891         (unspec:V12DIF [
7892                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
7893                 (match_operand:V12DIF 2 "register_operand" "0")
7894                 (match_operand:SI 3 "immediate_operand" "i")]
7895                 UNSPEC_LDAP1_LANE))]
7896   "TARGET_RCPC3"
7897   {
7898     operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
7899                                            INTVAL (operands[3]));
7900     return "ldap1\\t{%S0.<Vetype>}[%3], %1";
7901   }
7902   [(set_attr "type" "neon_load2_one_lane")]
7905 (define_expand "aarch64_vec_ldap1_lane<mode>"
7906   [(match_operand:V12DIF 0 "register_operand")
7907         (match_operand:DI 1 "register_operand")
7908         (match_operand:V12DIF 2 "register_operand")
7909         (match_operand:SI 3 "immediate_operand")]
7910   "TARGET_RCPC3"
7912   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7913   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7915   aarch64_simd_lane_bounds (operands[3], 0,
7916                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7917   emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
7918                                 mem, operands[2], operands[3]));
7919   DONE;
7922 (define_insn_and_split "aarch64_rev_reglist<mode>"
7923 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7924         (unspec:VSTRUCT_QD
7925                    [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7926                     (match_operand:V16QI 2 "register_operand" "w")]
7927                    UNSPEC_REV_REGLIST))]
7928   "TARGET_SIMD"
7929   "#"
7930   "&& reload_completed"
7931   [(const_int 0)]
7933   int i;
7934   int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7935   for (i = 0; i < nregs; i++)
7936     {
7937       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7938       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7939       emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7940     }
7941   DONE;
7943   [(set_attr "type" "neon_tbl1_q")
7944    (set_attr "length" "<insn_count>")]
7947 ;; Reload patterns for AdvSIMD register list operands.
7949 (define_expand "mov<mode>"
7950   [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7951         (match_operand:VSTRUCT_QD 1 "general_operand"))]
7952   "TARGET_FLOAT"
7954   if (known_eq (GET_MODE_SIZE (<MODE>mode), 16)
7955       && operands[1] == CONST0_RTX (<MODE>mode)
7956       && MEM_P (operands[0])
7957       && (can_create_pseudo_p ()
7958           || memory_address_p (TImode, XEXP (operands[0], 0))))
7959     {
7960       operands[0] = adjust_address (operands[0], TImode, 0);
7961       operands[1] = CONST0_RTX (TImode);
7962     }
7963   else if (can_create_pseudo_p ())
7964     {
7965       if (GET_CODE (operands[0]) != REG)
7966         operands[1] = force_reg (<MODE>mode, operands[1]);
7967     }
7970 (define_expand "mov<mode>"
7971   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7972         (match_operand:VSTRUCT 1 "general_operand"))]
7973   "TARGET_FLOAT"
7975   if (can_create_pseudo_p ())
7976     {
7977       if (GET_CODE (operands[0]) != REG)
7978         operands[1] = force_reg (<MODE>mode, operands[1]);
7979     }
7982 (define_expand "movv8di"
7983   [(set (match_operand:V8DI 0 "nonimmediate_operand")
7984         (match_operand:V8DI 1 "general_operand"))]
7985   ""
7987   if (can_create_pseudo_p () && MEM_P (operands[0]))
7988     operands[1] = force_reg (V8DImode, operands[1]);
7991 (define_expand "@aarch64_ld1x3<vstruct_elt>"
7992   [(match_operand:VSTRUCT_3QD 0 "register_operand")
7993    (match_operand:DI 1 "register_operand")]
7994   "TARGET_SIMD"
7996   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7997   emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7998   DONE;
8001 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
8002   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
8003         (unspec:VSTRUCT_3QD
8004           [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
8005           UNSPEC_LD1))]
8006   "TARGET_SIMD"
8007   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8008   [(set_attr "type" "neon_load1_3reg<q>")]
8011 (define_expand "@aarch64_ld1x4<vstruct_elt>"
8012   [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
8013    (match_operand:DI 1 "register_operand" "r")]
8014   "TARGET_SIMD"
8016   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8017   emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
8018   DONE;
8021 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
8022   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
8023         (unspec:VSTRUCT_4QD
8024           [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
8025         UNSPEC_LD1))]
8026   "TARGET_SIMD"
8027   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8028   [(set_attr "type" "neon_load1_4reg<q>")]
8031 (define_expand "@aarch64_st1x2<vstruct_elt>"
8032   [(match_operand:DI 0 "register_operand")
8033    (match_operand:VSTRUCT_2QD 1 "register_operand")]
8034   "TARGET_SIMD"
8036   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8037   emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
8038   DONE;
8041 (define_insn "aarch64_st1_x2_<vstruct_elt>"
8042   [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
8043         (unspec:VSTRUCT_2QD
8044                 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
8045                 UNSPEC_ST1))]
8046   "TARGET_SIMD"
8047   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8048   [(set_attr "type" "neon_store1_2reg<q>")]
8051 (define_expand "@aarch64_st1x3<vstruct_elt>"
8052   [(match_operand:DI 0 "register_operand")
8053    (match_operand:VSTRUCT_3QD 1 "register_operand")]
8054   "TARGET_SIMD"
8056   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8057   emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
8058   DONE;
8061 (define_insn "aarch64_st1_x3_<vstruct_elt>"
8062   [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
8063         (unspec:VSTRUCT_3QD
8064                 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
8065                 UNSPEC_ST1))]
8066   "TARGET_SIMD"
8067   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8068   [(set_attr "type" "neon_store1_3reg<q>")]
8071 (define_expand "@aarch64_st1x4<vstruct_elt>"
8072   [(match_operand:DI 0 "register_operand" "")
8073    (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
8074   "TARGET_SIMD"
8076   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8077   emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
8078   DONE;
8081 (define_insn "aarch64_st1_x4_<vstruct_elt>"
8082   [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
8083         (unspec:VSTRUCT_4QD
8084                 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
8085                 UNSPEC_ST1))]
8086   "TARGET_SIMD"
8087   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8088   [(set_attr "type" "neon_store1_4reg<q>")]
8091 (define_insn "*aarch64_movv8di"
8092   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
8093         (match_operand:V8DI 1 "general_operand" " r,r,m"))]
8094   "(register_operand (operands[0], V8DImode)
8095     || register_operand (operands[1], V8DImode))"
8096   "#"
8097   [(set_attr "type" "multiple,multiple,multiple")
8098    (set_attr "length" "32,16,16")]
8101 (define_insn "aarch64_be_ld1<mode>"
8102   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
8103         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
8104                              "aarch64_simd_struct_operand" "Utv")]
8105         UNSPEC_LD1))]
8106   "TARGET_SIMD"
8107   "ld1\\t{%0<Vmtype>}, %1"
8108   [(set_attr "type" "neon_load1_1reg<q>")]
8111 (define_insn "aarch64_be_st1<mode>"
8112   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
8113         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
8114         UNSPEC_ST1))]
8115   "TARGET_SIMD"
8116   "st1\\t{%1<Vmtype>}, %0"
8117   [(set_attr "type" "neon_store1_1reg<q>")]
8120 (define_insn "*aarch64_mov<mode>"
8121   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
8122         (match_operand:VSTRUCT_2D 1 "general_operand"))]
8123   "TARGET_FLOAT
8124    && (register_operand (operands[0], <MODE>mode)
8125        || register_operand (operands[1], <MODE>mode))"
8126   {@ [ cons: =0 , 1 ; attrs: type , length ]
8127      [ w        , w ; multiple    , 8      ] #
8128      [ m        , w ; neon_stp    , 4      ] stp\t%d1, %R1, %0
8129      [ w        , m ; neon_ldp    , 4      ] ldp\t%d0, %R0, %1
8130   }
8133 (define_insn "*aarch64_mov<mode>"
8134   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
8135         (match_operand:VSTRUCT_2Q 1 "general_operand"))]
8136   "TARGET_FLOAT
8137    && (register_operand (operands[0], <MODE>mode)
8138        || register_operand (operands[1], <MODE>mode))"
8139   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8140      [ w        , w ; multiple    , simd , 8      ] #
8141      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
8142      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8143   }
8146 (define_insn "*aarch64_movoi"
8147   [(set (match_operand:OI 0 "nonimmediate_operand")
8148         (match_operand:OI 1 "general_operand"))]
8149   "TARGET_FLOAT
8150    && (register_operand (operands[0], OImode)
8151        || register_operand (operands[1], OImode))"
8152   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8153      [ w        , w ; multiple    , simd , 8      ] #
8154      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
8155      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8156   }
8159 (define_insn "*aarch64_mov<mode>"
8160   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
8161         (match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
8162   "TARGET_FLOAT
8163    && (register_operand (operands[0], <MODE>mode)
8164        || register_operand (operands[1], <MODE>mode))"
8165   "#"
8166   [(set_attr "type" "multiple")
8167    (set_attr "arch" "fp<q>,*,*")
8168    (set_attr "length" "12,8,8")]
8171 (define_insn "*aarch64_movci"
8172   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8173         (match_operand:CI 1 "general_operand"      " w,w,o"))]
8174   "TARGET_FLOAT
8175    && (register_operand (operands[0], CImode)
8176        || register_operand (operands[1], CImode))"
8177   "#"
8178   [(set_attr "type" "multiple")
8179    (set_attr "arch" "simd,*,*")
8180    (set_attr "length" "12,8,8")]
8183 (define_insn "*aarch64_mov<mode>"
8184   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8185         (match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
8186   "TARGET_FLOAT
8187    && (register_operand (operands[0], <MODE>mode)
8188        || register_operand (operands[1], <MODE>mode))"
8189   "#"
8190   [(set_attr "type" "multiple")
8191    (set_attr "arch" "fp<q>,*,*")
8192    (set_attr "length" "16,8,8")]
8195 (define_insn "*aarch64_movxi"
8196   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8197         (match_operand:XI 1 "general_operand"      " w,w,o"))]
8198   "TARGET_FLOAT
8199    && (register_operand (operands[0], XImode)
8200        || register_operand (operands[1], XImode))"
8201   "#"
8202   [(set_attr "type" "multiple")
8203    (set_attr "arch" "simd,*,*")
8204    (set_attr "length" "16,8,8")]
8207 (define_split
8208   [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8209         (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8210   "TARGET_FLOAT && reload_completed"
8211   [(const_int 0)]
8213   aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8214   DONE;
8217 (define_split
8218   [(set (match_operand:OI 0 "register_operand")
8219         (match_operand:OI 1 "register_operand"))]
8220   "TARGET_FLOAT && reload_completed"
8221   [(const_int 0)]
8223   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8224   DONE;
8227 (define_split
8228   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8229         (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8230   "TARGET_FLOAT && reload_completed"
8231   [(const_int 0)]
8233   if (register_operand (operands[0], <MODE>mode)
8234       && register_operand (operands[1], <MODE>mode))
8235     aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8236   else
8237     {
8238       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8239       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8240       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8241                                            <MODE>mode, 0),
8242                       simplify_gen_subreg (pair_mode, operands[1],
8243                                            <MODE>mode, 0));
8244       emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8245                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8246                                                         operands[0],
8247                                                         <MODE>mode,
8248                                                         2 * elt_size)),
8249                       gen_lowpart (<VSTRUCT_ELT>mode,
8250                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8251                                                         operands[1],
8252                                                         <MODE>mode,
8253                                                         2 * elt_size)));
8254     }
8255   DONE;
8258 (define_split
8259   [(set (match_operand:CI 0 "nonimmediate_operand")
8260         (match_operand:CI 1 "general_operand"))]
8261   "TARGET_FLOAT && reload_completed"
8262   [(const_int 0)]
8264   if (register_operand (operands[0], CImode)
8265       && register_operand (operands[1], CImode))
8266     aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8267   else
8268     {
8269       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8270                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
8271       emit_move_insn (gen_lowpart (V16QImode,
8272                                    simplify_gen_subreg (TImode, operands[0],
8273                                                         CImode, 32)),
8274                       gen_lowpart (V16QImode,
8275                                    simplify_gen_subreg (TImode, operands[1],
8276                                                         CImode, 32)));
8277     }
8278   DONE;
8281 (define_split
8282   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8283         (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8284   "TARGET_FLOAT && reload_completed"
8285   [(const_int 0)]
8287   if (register_operand (operands[0], <MODE>mode)
8288       && register_operand (operands[1], <MODE>mode))
8289     aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8290   else
8291     {
8292       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8293       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8294       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8295                                            <MODE>mode, 0),
8296                       simplify_gen_subreg (pair_mode, operands[1],
8297                                            <MODE>mode, 0));
8298       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8299                                            <MODE>mode, 2 * elt_size),
8300                       simplify_gen_subreg (pair_mode, operands[1],
8301                                            <MODE>mode, 2 * elt_size));
8302     }
8303   DONE;
8306 (define_split
8307   [(set (match_operand:XI 0 "nonimmediate_operand")
8308         (match_operand:XI 1 "general_operand"))]
8309   "TARGET_FLOAT && reload_completed"
8310   [(const_int 0)]
8312   if (register_operand (operands[0], XImode)
8313       && register_operand (operands[1], XImode))
8314     aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8315   else
8316     {
8317       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8318                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
8319       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8320                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
8321     }
8322   DONE;
8325 (define_split
8326   [(set (match_operand:V8DI 0 "nonimmediate_operand")
8327         (match_operand:V8DI 1 "general_operand"))]
8328   "reload_completed"
8329   [(const_int 0)]
8331   if (register_operand (operands[0], V8DImode)
8332       && register_operand (operands[1], V8DImode))
8333     {
8334       aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8335       DONE;
8336     }
8337   else if ((register_operand (operands[0], V8DImode)
8338             && memory_operand (operands[1], V8DImode))
8339            || (memory_operand (operands[0], V8DImode)
8340                && register_operand (operands[1], V8DImode)))
8341     {
8342       /* V8DI only guarantees 8-byte alignment, whereas TImode requires 16.  */
8343       auto mode = STRICT_ALIGNMENT ? DImode : TImode;
8344       int increment = GET_MODE_SIZE (mode);
8345       std::pair<rtx, rtx> last_pair = {};
8346       for (int offset = 0; offset < 64; offset += increment)
8347         {
8348           std::pair<rtx, rtx> pair = {
8349             simplify_gen_subreg (mode, operands[0], V8DImode, offset),
8350             simplify_gen_subreg (mode, operands[1], V8DImode, offset)
8351           };
8352           if (register_operand (pair.first, mode)
8353               && reg_overlap_mentioned_p (pair.first, pair.second))
8354             last_pair = pair;
8355           else
8356             emit_move_insn (pair.first, pair.second);
8357         }
8358       if (last_pair.first)
8359         emit_move_insn (last_pair.first, last_pair.second);
8360       DONE;
8361     }
8362   else
8363     FAIL;
8366 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8367   [(match_operand:VSTRUCT_QD 0 "register_operand")
8368    (match_operand:DI 1 "register_operand")]
8369   "TARGET_SIMD"
8371   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8372   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8374   emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8375   DONE;
8378 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8379   [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8380         (unspec:VSTRUCT_2DNX [
8381           (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8382           UNSPEC_LD2_DREG))]
8383   "TARGET_SIMD"
8384   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8385   [(set_attr "type" "neon_load2_2reg<q>")]
8388 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8389   [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8390         (unspec:VSTRUCT_2DX [
8391           (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8392           UNSPEC_LD2_DREG))]
8393   "TARGET_SIMD"
8394   "ld1\\t{%S0.1d - %T0.1d}, %1"
8395   [(set_attr "type" "neon_load1_2reg<q>")]
8398 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8399   [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8400         (unspec:VSTRUCT_3DNX [
8401           (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8402           UNSPEC_LD3_DREG))]
8403   "TARGET_SIMD"
8404   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8405   [(set_attr "type" "neon_load3_3reg<q>")]
8408 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8409   [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8410         (unspec:VSTRUCT_3DX [
8411           (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8412           UNSPEC_LD3_DREG))]
8413   "TARGET_SIMD"
8414   "ld1\\t{%S0.1d - %U0.1d}, %1"
8415   [(set_attr "type" "neon_load1_3reg<q>")]
8418 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8419   [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8420         (unspec:VSTRUCT_4DNX [
8421           (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8422           UNSPEC_LD4_DREG))]
8423   "TARGET_SIMD"
8424   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8425   [(set_attr "type" "neon_load4_4reg<q>")]
8428 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8429   [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8430         (unspec:VSTRUCT_4DX [
8431           (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8432           UNSPEC_LD4_DREG))]
8433   "TARGET_SIMD"
8434   "ld1\\t{%S0.1d - %V0.1d}, %1"
8435   [(set_attr "type" "neon_load1_4reg<q>")]
8438 (define_expand "@aarch64_ld<nregs><vstruct_elt>"
8439  [(match_operand:VSTRUCT_D 0 "register_operand")
8440   (match_operand:DI 1 "register_operand")]
8441   "TARGET_SIMD"
8443   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8444   emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8445   DONE;
8448 (define_expand "@aarch64_ld1<VALL_F16:mode>"
8449  [(match_operand:VALL_F16 0 "register_operand")
8450   (match_operand:DI 1 "register_operand")]
8451   "TARGET_SIMD"
8453   machine_mode mode = <VALL_F16:MODE>mode;
8454   rtx mem = gen_rtx_MEM (mode, operands[1]);
8456   if (BYTES_BIG_ENDIAN)
8457     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8458   else
8459     emit_move_insn (operands[0], mem);
8460   DONE;
8463 (define_expand "@aarch64_ld<nregs><vstruct_elt>"
8464  [(match_operand:VSTRUCT_Q 0 "register_operand")
8465   (match_operand:DI 1 "register_operand")]
8466   "TARGET_SIMD"
8468   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8469   emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8470   DONE;
8473 (define_expand "@aarch64_ld1x2<vstruct_elt>"
8474  [(match_operand:VSTRUCT_2QD 0 "register_operand")
8475   (match_operand:DI 1 "register_operand")]
8476   "TARGET_SIMD"
8478   machine_mode mode = <MODE>mode;
8479   rtx mem = gen_rtx_MEM (mode, operands[1]);
8481   emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8482   DONE;
8485 (define_expand "@aarch64_ld<nregs>_lane<vstruct_elt>"
8486   [(match_operand:VSTRUCT_QD 0 "register_operand")
8487         (match_operand:DI 1 "register_operand")
8488         (match_operand:VSTRUCT_QD 2 "register_operand")
8489         (match_operand:SI 3 "immediate_operand")]
8490   "TARGET_SIMD"
8492   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8493   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8495   aarch64_simd_lane_bounds (operands[3], 0,
8496                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8497   emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8498                                 mem, operands[2], operands[3]));
8499   DONE;
8502 ;; Permuted-store expanders for neon intrinsics.
8504 ;; Permute instructions
8506 ;; vec_perm support
8508 (define_expand "vec_perm<mode>"
8509   [(match_operand:VB 0 "register_operand")
8510    (match_operand:VB 1 "register_operand")
8511    (match_operand:VB 2 "register_operand")
8512    (match_operand:VB 3 "register_operand")]
8513   "TARGET_SIMD"
8515   aarch64_expand_vec_perm (operands[0], operands[1],
8516                            operands[2], operands[3], <nunits>);
8517   DONE;
8520 (define_insn "aarch64_qtbl1<mode>"
8521   [(set (match_operand:VB 0 "register_operand" "=w")
8522         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8523                     (match_operand:VB 2 "register_operand" "w")]
8524                    UNSPEC_TBL))]
8525   "TARGET_SIMD"
8526   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8527   [(set_attr "type" "neon_tbl1<q>")]
8530 (define_insn "aarch64_qtbx1<mode>"
8531   [(set (match_operand:VB 0 "register_operand" "=w")
8532         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8533                     (match_operand:V16QI 2 "register_operand" "w")
8534                     (match_operand:VB 3 "register_operand" "w")]
8535                    UNSPEC_TBX))]
8536   "TARGET_SIMD"
8537   "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8538   [(set_attr "type" "neon_tbl1<q>")]
8541 ;; Two source registers.
8543 (define_insn "aarch64_qtbl2<mode>"
8544   [(set (match_operand:VB 0 "register_operand" "=w")
8545         (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8546                       (match_operand:VB 2 "register_operand" "w")]
8547                       UNSPEC_TBL))]
8548   "TARGET_SIMD"
8549   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8550   [(set_attr "type" "neon_tbl2")]
8553 (define_insn "aarch64_qtbx2<mode>"
8554   [(set (match_operand:VB 0 "register_operand" "=w")
8555         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8556                       (match_operand:V2x16QI 2 "register_operand" "w")
8557                       (match_operand:VB 3 "register_operand" "w")]
8558                       UNSPEC_TBX))]
8559   "TARGET_SIMD"
8560   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8561   [(set_attr "type" "neon_tbl2")]
8564 ;; Three source registers.
8566 (define_insn "aarch64_qtbl3<mode>"
8567   [(set (match_operand:VB 0 "register_operand" "=w")
8568         (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8569                       (match_operand:VB 2 "register_operand" "w")]
8570                       UNSPEC_TBL))]
8571   "TARGET_SIMD"
8572   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8573   [(set_attr "type" "neon_tbl3")]
8576 (define_insn "aarch64_qtbx3<mode>"
8577   [(set (match_operand:VB 0 "register_operand" "=w")
8578         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8579                       (match_operand:V3x16QI 2 "register_operand" "w")
8580                       (match_operand:VB 3 "register_operand" "w")]
8581                       UNSPEC_TBX))]
8582   "TARGET_SIMD"
8583   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8584   [(set_attr "type" "neon_tbl3")]
8587 ;; Four source registers.
8589 (define_insn "aarch64_qtbl4<mode>"
8590   [(set (match_operand:VB 0 "register_operand" "=w")
8591         (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8592                       (match_operand:VB 2 "register_operand" "w")]
8593                       UNSPEC_TBL))]
8594   "TARGET_SIMD"
8595   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8596   [(set_attr "type" "neon_tbl4")]
8599 (define_insn "aarch64_qtbx4<mode>"
8600   [(set (match_operand:VB 0 "register_operand" "=w")
8601         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8602                       (match_operand:V4x16QI 2 "register_operand" "w")
8603                       (match_operand:VB 3 "register_operand" "w")]
8604                       UNSPEC_TBX))]
8605   "TARGET_SIMD"
8606   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8607   [(set_attr "type" "neon_tbl4")]
8610 (define_insn_and_split "aarch64_combinev16qi"
8611   [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8612         (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8613                          (match_operand:V16QI 2 "register_operand" "w")]
8614                         UNSPEC_CONCAT))]
8615   "TARGET_SIMD"
8616   "#"
8617   "&& 1"
8618   [(const_int 0)]
8620   aarch64_split_combinev16qi (operands);
8621   DONE;
8623 [(set_attr "type" "multiple")]
8626 ;; This instruction's pattern is generated directly by
8627 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8628 ;; need corresponding changes there.
8629 (define_insn "@aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8630   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8631         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8632                           (match_operand:VALL_F16 2 "register_operand" "w")]
8633          PERMUTE))]
8634   "TARGET_SIMD"
8635   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8636   [(set_attr "type" "neon_permute<q>")]
8639 ;; ZIP1 ignores the contents of the upper halves of the registers,
8640 ;; so we can describe 128-bit operations in terms of 64-bit inputs.
8641 (define_insn "aarch64_zip1<mode>_low"
8642   [(set (match_operand:VQ 0 "register_operand" "=w")
8643         (unspec:VQ [(match_operand:<VHALF> 1 "register_operand" "w")
8644                     (match_operand:<VHALF> 2 "register_operand" "w")]
8645                    UNSPEC_ZIP1))]
8646   "TARGET_SIMD"
8647   "zip1\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8648   [(set_attr "type" "neon_permute_q")]
8651 ;; This instruction's pattern is generated directly by
8652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8653 ;; need corresponding changes there.  Note that the immediate (third)
8654 ;; operand is a lane index not a byte index.
8655 (define_insn "@aarch64_ext<mode>"
8656   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8657         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8658                           (match_operand:VALL_F16 2 "register_operand" "w")
8659                           (match_operand:SI 3 "immediate_operand" "i")]
8660          UNSPEC_EXT))]
8661   "TARGET_SIMD"
8663   operands[3] = GEN_INT (INTVAL (operands[3])
8664       * GET_MODE_UNIT_SIZE (<MODE>mode));
8665   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8667   [(set_attr "type" "neon_ext<q>")]
8670 ;; This instruction's pattern is generated directly by
8671 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8672 ;; need corresponding changes there.
8673 (define_insn "@aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8674   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8675         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8676                     REVERSE))]
8677   "TARGET_SIMD"
8678   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8679   [(set_attr "type" "neon_rev<q>")]
8682 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8683   [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8684         (unspec:VSTRUCT_2DNX [
8685                 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8686                 UNSPEC_ST2))]
8687   "TARGET_SIMD"
8688   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8689   [(set_attr "type" "neon_store2_2reg")]
8692 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8693   [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8694         (unspec:VSTRUCT_2DX [
8695                 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8696                 UNSPEC_ST2))]
8697   "TARGET_SIMD"
8698   "st1\\t{%S1.1d - %T1.1d}, %0"
8699   [(set_attr "type" "neon_store1_2reg")]
8702 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8703   [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8704         (unspec:VSTRUCT_3DNX [
8705                 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8706                 UNSPEC_ST3))]
8707   "TARGET_SIMD"
8708   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8709   [(set_attr "type" "neon_store3_3reg")]
8712 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8713   [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8714         (unspec:VSTRUCT_3DX [
8715                 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8716                 UNSPEC_ST3))]
8717   "TARGET_SIMD"
8718   "st1\\t{%S1.1d - %U1.1d}, %0"
8719   [(set_attr "type" "neon_store1_3reg")]
8722 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8723   [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8724         (unspec:VSTRUCT_4DNX [
8725                 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8726                 UNSPEC_ST4))]
8727   "TARGET_SIMD"
8728   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8729   [(set_attr "type" "neon_store4_4reg")]
8732 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8733   [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8734         (unspec:VSTRUCT_4DX [
8735                 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8736                 UNSPEC_ST4))]
8737   "TARGET_SIMD"
8738   "st1\\t{%S1.1d - %V1.1d}, %0"
8739   [(set_attr "type" "neon_store1_4reg")]
8742 (define_expand "@aarch64_st<nregs><vstruct_elt>"
8743  [(match_operand:DI 0 "register_operand")
8744   (match_operand:VSTRUCT_D 1 "register_operand")]
8745   "TARGET_SIMD"
8747   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8748   emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8749   DONE;
8752 (define_expand "@aarch64_st<nregs><vstruct_elt>"
8753  [(match_operand:DI 0 "register_operand")
8754   (match_operand:VSTRUCT_Q 1 "register_operand")]
8755   "TARGET_SIMD"
8757   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8758   emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8759   DONE;
8762 (define_expand "@aarch64_st<nregs>_lane<vstruct_elt>"
8763  [(match_operand:DI 0 "register_operand")
8764   (match_operand:VSTRUCT_QD 1 "register_operand")
8765   (match_operand:SI 2 "immediate_operand")]
8766   "TARGET_SIMD"
8768   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8769   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8771   aarch64_simd_lane_bounds (operands[2], 0,
8772                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8773   emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8774                                         operands[1], operands[2]));
8775   DONE;
8778 (define_expand "@aarch64_st1<VALL_F16:mode>"
8779  [(match_operand:DI 0 "register_operand")
8780   (match_operand:VALL_F16 1 "register_operand")]
8781   "TARGET_SIMD"
8783   machine_mode mode = <VALL_F16:MODE>mode;
8784   rtx mem = gen_rtx_MEM (mode, operands[0]);
8786   if (BYTES_BIG_ENDIAN)
8787     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8788   else
8789     emit_move_insn (mem, operands[1]);
8790   DONE;
8793 ;; Standard pattern name vec_init<mode><Vel>.
8795 (define_expand "vec_init<mode><Vel>"
8796   [(match_operand:VALL_F16 0 "register_operand")
8797    (match_operand 1 "" "")]
8798   "TARGET_SIMD"
8800   aarch64_expand_vector_init (operands[0], operands[1]);
8801   DONE;
8804 (define_expand "vec_init<mode><Vhalf>"
8805   [(match_operand:VQ_NO2E 0 "register_operand")
8806    (match_operand 1 "" "")]
8807   "TARGET_SIMD"
8809   aarch64_expand_vector_init (operands[0], operands[1]);
8810   DONE;
8813 (define_insn "*aarch64_simd_ld1r<mode>"
8814   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8815         (vec_duplicate:VALL_F16
8816           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8817   "TARGET_SIMD"
8818   "ld1r\\t{%0.<Vtype>}, %1"
8819   [(set_attr "type" "neon_load1_all_lanes")]
8822 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8823   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8824         (unspec:VSTRUCT_2QD [
8825             (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8826             UNSPEC_LD1))]
8827   "TARGET_SIMD"
8828   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8829   [(set_attr "type" "neon_load1_2reg<q>")]
8833 (define_insn "@aarch64_frecpe<mode>"
8834   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8835         (unspec:VHSDF_HSDF
8836          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8837          UNSPEC_FRECPE))]
8838   "TARGET_SIMD"
8839   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8840   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8843 (define_insn "aarch64_frecpx<mode>"
8844   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8845         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8846          UNSPEC_FRECPX))]
8847   "TARGET_SIMD"
8848   "frecpx\t%<s>0, %<s>1"
8849   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8852 (define_insn "@aarch64_frecps<mode>"
8853   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8854         (unspec:VHSDF_HSDF
8855           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8856           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8857           UNSPEC_FRECPS))]
8858   "TARGET_SIMD"
8859   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8860   [(set_attr "type" "neon_fp_recps_<stype><q>")]
8863 (define_insn "aarch64_urecpe<mode>"
8864   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8865         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8866                 UNSPEC_URECPE))]
8867  "TARGET_SIMD"
8868  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8869   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8871 ;; Standard pattern name vec_extract<mode><Vel>.
8873 (define_expand "vec_extract<mode><Vel>"
8874   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8875    (match_operand:VALL_F16 1 "register_operand")
8876    (match_operand:SI 2 "immediate_operand")]
8877   "TARGET_SIMD"
8879     emit_insn
8880       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8881     DONE;
8884 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8885 (define_expand "vec_extract<mode><Vhalf>"
8886   [(match_operand:<VHALF> 0 "register_operand")
8887    (match_operand:VQMOV_NO2E 1 "register_operand")
8888    (match_operand 2 "immediate_operand")]
8889   "TARGET_SIMD"
8891   int start = INTVAL (operands[2]);
8892   gcc_assert (start == 0 || start == 1);
8893   start *= <nunits> / 2;
8894   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8895   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8896   DONE;
8899 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8900 (define_expand "vec_extract<mode><V1half>"
8901   [(match_operand:<V1HALF> 0 "register_operand")
8902    (match_operand:VQ_2E 1 "register_operand")
8903    (match_operand 2 "immediate_operand")]
8904   "TARGET_SIMD"
8906   /* V1DI and V1DF are rarely used by other patterns, so it should be better
8907      to hide it in a subreg destination of a normal DI or DF op.  */
8908   rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8909   emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8910   DONE;
8913 ;; aes
8915 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8916   [(set (match_operand:V16QI 0 "register_operand" "=w")
8917         (unspec:V16QI
8918                 [(xor:V16QI
8919                  (match_operand:V16QI 1 "register_operand" "%0")
8920                  (match_operand:V16QI 2 "register_operand" "w"))]
8921          CRYPTO_AES))]
8922   "TARGET_AES"
8923   "aes<aes_op>\\t%0.16b, %2.16b"
8924   [(set_attr "type" "crypto_aese")]
8927 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8928   [(set (match_operand:V16QI 0 "register_operand" "=w")
8929         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8930          CRYPTO_AESMC))]
8931   "TARGET_AES"
8932   "aes<aesmc_op>\\t%0.16b, %1.16b"
8933   [(set_attr "type" "crypto_aesmc")]
8936 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8937 ;; and enforce the register dependency without scheduling or register
8938 ;; allocation messing up the order or introducing moves inbetween.
8939 ;;  Mash the two together during combine.
8941 (define_insn "*aarch64_crypto_aese_fused"
8942   [(set (match_operand:V16QI 0 "register_operand" "=w")
8943         (unspec:V16QI
8944           [(unspec:V16QI
8945            [(xor:V16QI
8946                 (match_operand:V16QI 1 "register_operand" "%0")
8947                 (match_operand:V16QI 2 "register_operand" "w"))]
8948              UNSPEC_AESE)]
8949         UNSPEC_AESMC))]
8950   "TARGET_AES
8951    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8952   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8953   [(set_attr "type" "crypto_aese")
8954    (set_attr "length" "8")]
8957 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8958 ;; and enforce the register dependency without scheduling or register
8959 ;; allocation messing up the order or introducing moves inbetween.
8960 ;;  Mash the two together during combine.
8962 (define_insn "*aarch64_crypto_aesd_fused"
8963   [(set (match_operand:V16QI 0 "register_operand" "=w")
8964         (unspec:V16QI
8965           [(unspec:V16QI
8966                     [(xor:V16QI
8967                         (match_operand:V16QI 1 "register_operand" "%0")
8968                         (match_operand:V16QI 2 "register_operand" "w"))]
8969                 UNSPEC_AESD)]
8970           UNSPEC_AESIMC))]
8971   "TARGET_AES
8972    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8973   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8974   [(set_attr "type" "crypto_aese")
8975    (set_attr "length" "8")]
8978 ;; sha1
8980 (define_insn "aarch64_crypto_sha1hsi"
8981   [(set (match_operand:SI 0 "register_operand" "=w")
8982         (unspec:SI [(match_operand:SI 1
8983                        "register_operand" "w")]
8984          UNSPEC_SHA1H))]
8985   "TARGET_SHA2"
8986   "sha1h\\t%s0, %s1"
8987   [(set_attr "type" "crypto_sha1_fast")]
8990 (define_insn "aarch64_crypto_sha1hv4si"
8991   [(set (match_operand:SI 0 "register_operand" "=w")
8992         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8993                      (parallel [(const_int 0)]))]
8994          UNSPEC_SHA1H))]
8995   "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8996   "sha1h\\t%s0, %s1"
8997   [(set_attr "type" "crypto_sha1_fast")]
9000 (define_insn "aarch64_be_crypto_sha1hv4si"
9001   [(set (match_operand:SI 0 "register_operand" "=w")
9002         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
9003                      (parallel [(const_int 3)]))]
9004          UNSPEC_SHA1H))]
9005   "TARGET_SHA2 && BYTES_BIG_ENDIAN"
9006   "sha1h\\t%s0, %s1"
9007   [(set_attr "type" "crypto_sha1_fast")]
9010 (define_insn "aarch64_crypto_sha1su1v4si"
9011   [(set (match_operand:V4SI 0 "register_operand" "=w")
9012         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9013                       (match_operand:V4SI 2 "register_operand" "w")]
9014          UNSPEC_SHA1SU1))]
9015   "TARGET_SHA2"
9016   "sha1su1\\t%0.4s, %2.4s"
9017   [(set_attr "type" "crypto_sha1_fast")]
9020 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
9021   [(set (match_operand:V4SI 0 "register_operand" "=w")
9022         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9023                       (match_operand:SI 2 "register_operand" "w")
9024                       (match_operand:V4SI 3 "register_operand" "w")]
9025          CRYPTO_SHA1))]
9026   "TARGET_SHA2"
9027   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
9028   [(set_attr "type" "crypto_sha1_slow")]
9031 (define_insn "aarch64_crypto_sha1su0v4si"
9032   [(set (match_operand:V4SI 0 "register_operand" "=w")
9033         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9034                       (match_operand:V4SI 2 "register_operand" "w")
9035                       (match_operand:V4SI 3 "register_operand" "w")]
9036          UNSPEC_SHA1SU0))]
9037   "TARGET_SHA2"
9038   "sha1su0\\t%0.4s, %2.4s, %3.4s"
9039   [(set_attr "type" "crypto_sha1_xor")]
9042 ;; sha256
9044 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
9045   [(set (match_operand:V4SI 0 "register_operand" "=w")
9046         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9047                       (match_operand:V4SI 2 "register_operand" "w")
9048                       (match_operand:V4SI 3 "register_operand" "w")]
9049          CRYPTO_SHA256))]
9050   "TARGET_SHA2"
9051   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
9052   [(set_attr "type" "crypto_sha256_slow")]
9055 (define_insn "aarch64_crypto_sha256su0v4si"
9056   [(set (match_operand:V4SI 0 "register_operand" "=w")
9057         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9058                       (match_operand:V4SI 2 "register_operand" "w")]
9059          UNSPEC_SHA256SU0))]
9060   "TARGET_SHA2"
9061   "sha256su0\\t%0.4s, %2.4s"
9062   [(set_attr "type" "crypto_sha256_fast")]
9065 (define_insn "aarch64_crypto_sha256su1v4si"
9066   [(set (match_operand:V4SI 0 "register_operand" "=w")
9067         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9068                       (match_operand:V4SI 2 "register_operand" "w")
9069                       (match_operand:V4SI 3 "register_operand" "w")]
9070          UNSPEC_SHA256SU1))]
9071   "TARGET_SHA2"
9072   "sha256su1\\t%0.4s, %2.4s, %3.4s"
9073   [(set_attr "type" "crypto_sha256_slow")]
9076 ;; sha512
9078 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
9079   [(set (match_operand:V2DI 0 "register_operand" "=w")
9080         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9081                       (match_operand:V2DI 2 "register_operand" "w")
9082                       (match_operand:V2DI 3 "register_operand" "w")]
9083          CRYPTO_SHA512))]
9084   "TARGET_SHA3"
9085   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
9086   [(set_attr "type" "crypto_sha512")]
9089 (define_insn "aarch64_crypto_sha512su0qv2di"
9090   [(set (match_operand:V2DI 0 "register_operand" "=w")
9091         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9092                       (match_operand:V2DI 2 "register_operand" "w")]
9093          UNSPEC_SHA512SU0))]
9094   "TARGET_SHA3"
9095   "sha512su0\\t%0.2d, %2.2d"
9096   [(set_attr "type" "crypto_sha512")]
9099 (define_insn "aarch64_crypto_sha512su1qv2di"
9100   [(set (match_operand:V2DI 0 "register_operand" "=w")
9101         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9102                       (match_operand:V2DI 2 "register_operand" "w")
9103                       (match_operand:V2DI 3 "register_operand" "w")]
9104          UNSPEC_SHA512SU1))]
9105   "TARGET_SHA3"
9106   "sha512su1\\t%0.2d, %2.2d, %3.2d"
9107   [(set_attr "type" "crypto_sha512")]
9110 ;; sha3
9112 (define_insn "eor3q<mode>4"
9113   [(set (match_operand:VQ_I 0 "register_operand" "=w")
9114         (xor:VQ_I
9115          (xor:VQ_I
9116           (match_operand:VQ_I 2 "register_operand" "w")
9117           (match_operand:VQ_I 3 "register_operand" "w"))
9118          (match_operand:VQ_I 1 "register_operand" "w")))]
9119   "TARGET_SHA3"
9120   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
9121   [(set_attr "type" "crypto_sha3")]
9124 (define_insn "aarch64_rax1qv2di"
9125   [(set (match_operand:V2DI 0 "register_operand" "=w")
9126         (xor:V2DI
9127          (rotate:V2DI
9128           (match_operand:V2DI 2 "register_operand" "w")
9129           (const_int 1))
9130          (match_operand:V2DI 1 "register_operand" "w")))]
9131   "TARGET_SHA3"
9132   "rax1\\t%0.2d, %1.2d, %2.2d"
9133   [(set_attr "type" "crypto_sha3")]
9136 (define_insn "*aarch64_xarqv2di_insn"
9137   [(set (match_operand:V2DI 0 "register_operand" "=w")
9138         (rotate:V2DI
9139          (xor:V2DI
9140           (match_operand:V2DI 1 "register_operand" "%w")
9141           (match_operand:V2DI 2 "register_operand" "w"))
9142          (match_operand:V2DI 3 "aarch64_simd_lshift_imm" "Dl")))]
9143   "TARGET_SHA3"
9144   {
9145     operands[3]
9146       = GEN_INT (64 - INTVAL (unwrap_const_vec_duplicate (operands[3])));
9147     return "xar\\t%0.2d, %1.2d, %2.2d, %3";
9148   }
9149   [(set_attr "type" "crypto_sha3")]
9152 ;; The semantics of the vxarq_u64 intrinsics treat the immediate argument as a
9153 ;; right-rotate amount but the recommended representation of rotates by a
9154 ;; constant in RTL is with the left ROTATE code.  Translate between the
9155 ;; intrinsic-provided amount and the RTL operands in the expander here.
9156 ;; The define_insn for XAR will translate back to instruction semantics in its
9157 ;; output logic.
9158 (define_expand "aarch64_xarqv2di"
9159   [(set (match_operand:V2DI 0 "register_operand")
9160         (rotate:V2DI
9161          (xor:V2DI
9162           (match_operand:V2DI 1 "register_operand")
9163           (match_operand:V2DI 2 "register_operand"))
9164          (match_operand:SI 3 "aarch64_simd_shift_imm_di")))]
9165   "TARGET_SHA3"
9166   {
9167     operands[3]
9168       = aarch64_simd_gen_const_vector_dup (V2DImode,
9169                                            64 - INTVAL (operands[3]));
9170   }
9173 (define_insn "bcaxq<mode>4"
9174   [(set (match_operand:VQ_I 0 "register_operand" "=w")
9175         (xor:VQ_I
9176          (and:VQ_I
9177           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
9178           (match_operand:VQ_I 2 "register_operand" "w"))
9179          (match_operand:VQ_I 1 "register_operand" "w")))]
9180   "TARGET_SHA3"
9181   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
9182   [(set_attr "type" "crypto_sha3")]
9185 ;; SM3
9187 (define_insn "aarch64_sm3ss1qv4si"
9188   [(set (match_operand:V4SI 0 "register_operand" "=w")
9189         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9190                       (match_operand:V4SI 2 "register_operand" "w")
9191                       (match_operand:V4SI 3 "register_operand" "w")]
9192          UNSPEC_SM3SS1))]
9193   "TARGET_SM4"
9194   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
9195   [(set_attr "type" "crypto_sm3")]
9199 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9200   [(set (match_operand:V4SI 0 "register_operand" "=w")
9201         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9202                       (match_operand:V4SI 2 "register_operand" "w")
9203                       (match_operand:V4SI 3 "register_operand" "w")
9204                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9205          CRYPTO_SM3TT))]
9206   "TARGET_SM4"
9207   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9208   [(set_attr "type" "crypto_sm3")]
9211 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9212   [(set (match_operand:V4SI 0 "register_operand" "=w")
9213         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9214                       (match_operand:V4SI 2 "register_operand" "w")
9215                       (match_operand:V4SI 3 "register_operand" "w")]
9216          CRYPTO_SM3PART))]
9217   "TARGET_SM4"
9218   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9219   [(set_attr "type" "crypto_sm3")]
9222 ;; SM4
9224 (define_insn "aarch64_sm4eqv4si"
9225   [(set (match_operand:V4SI 0 "register_operand" "=w")
9226         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9227                       (match_operand:V4SI 2 "register_operand" "w")]
9228          UNSPEC_SM4E))]
9229   "TARGET_SM4"
9230   "sm4e\\t%0.4s, %2.4s"
9231   [(set_attr "type" "crypto_sm4")]
9234 (define_insn "aarch64_sm4ekeyqv4si"
9235   [(set (match_operand:V4SI 0 "register_operand" "=w")
9236         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9237                       (match_operand:V4SI 2 "register_operand" "w")]
9238          UNSPEC_SM4EKEY))]
9239   "TARGET_SM4"
9240   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9241   [(set_attr "type" "crypto_sm4")]
9244 ;; fp16fml
9246 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9247   [(set (match_operand:VDQSF 0 "register_operand")
9248         (unspec:VDQSF
9249          [(match_operand:VDQSF 1 "register_operand")
9250           (match_operand:<VFMLA_W> 2 "register_operand")
9251           (match_operand:<VFMLA_W> 3 "register_operand")]
9252          VFMLA16_LOW))]
9253   "TARGET_F16FML"
9255   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9256                                             <nunits> * 2, false);
9257   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9258                                             <nunits> * 2, false);
9260   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9261                                                                 operands[1],
9262                                                                 operands[2],
9263                                                                 operands[3],
9264                                                                 p1, p2));
9265   DONE;
9269 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9270   [(set (match_operand:VDQSF 0 "register_operand")
9271         (unspec:VDQSF
9272          [(match_operand:VDQSF 1 "register_operand")
9273           (match_operand:<VFMLA_W> 2 "register_operand")
9274           (match_operand:<VFMLA_W> 3 "register_operand")]
9275          VFMLA16_HIGH))]
9276   "TARGET_F16FML"
9278   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9279   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9281   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9282                                                                  operands[1],
9283                                                                  operands[2],
9284                                                                  operands[3],
9285                                                                  p1, p2));
9286   DONE;
9289 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9290   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9291         (fma:VDQSF
9292          (float_extend:VDQSF
9293           (vec_select:<VFMLA_SEL_W>
9294            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9295            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9296          (float_extend:VDQSF
9297           (vec_select:<VFMLA_SEL_W>
9298            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9299            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9300          (match_operand:VDQSF 1 "register_operand" "0")))]
9301   "TARGET_F16FML"
9302   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9303   [(set_attr "type" "neon_fp_mul_s")]
9306 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9307   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9308         (fma:VDQSF
9309          (float_extend:VDQSF
9310           (neg:<VFMLA_SEL_W>
9311            (vec_select:<VFMLA_SEL_W>
9312             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9313             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9314          (float_extend:VDQSF
9315           (vec_select:<VFMLA_SEL_W>
9316            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9317            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9318          (match_operand:VDQSF 1 "register_operand" "0")))]
9319   "TARGET_F16FML"
9320   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9321   [(set_attr "type" "neon_fp_mul_s")]
9324 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9325   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9326         (fma:VDQSF
9327          (float_extend:VDQSF
9328           (vec_select:<VFMLA_SEL_W>
9329            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9330            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9331          (float_extend:VDQSF
9332           (vec_select:<VFMLA_SEL_W>
9333            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9334            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9335          (match_operand:VDQSF 1 "register_operand" "0")))]
9336   "TARGET_F16FML"
9337   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9338   [(set_attr "type" "neon_fp_mul_s")]
9341 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9342   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9343         (fma:VDQSF
9344          (float_extend:VDQSF
9345           (neg:<VFMLA_SEL_W>
9346            (vec_select:<VFMLA_SEL_W>
9347             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9348             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9349          (float_extend:VDQSF
9350           (vec_select:<VFMLA_SEL_W>
9351            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9352            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9353          (match_operand:VDQSF 1 "register_operand" "0")))]
9354   "TARGET_F16FML"
9355   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9356   [(set_attr "type" "neon_fp_mul_s")]
9359 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9360   [(set (match_operand:V2SF 0 "register_operand")
9361         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9362                            (match_operand:V4HF 2 "register_operand")
9363                            (match_operand:V4HF 3 "register_operand")
9364                            (match_operand:SI 4 "aarch64_imm2")]
9365          VFMLA16_LOW))]
9366   "TARGET_F16FML"
9368     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9369     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9371     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9372                                                             operands[1],
9373                                                             operands[2],
9374                                                             operands[3],
9375                                                             p1, lane));
9376     DONE;
9380 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9381   [(set (match_operand:V2SF 0 "register_operand")
9382         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9383                            (match_operand:V4HF 2 "register_operand")
9384                            (match_operand:V4HF 3 "register_operand")
9385                            (match_operand:SI 4 "aarch64_imm2")]
9386          VFMLA16_HIGH))]
9387   "TARGET_F16FML"
9389     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9390     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9392     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9393                                                              operands[1],
9394                                                              operands[2],
9395                                                              operands[3],
9396                                                              p1, lane));
9397     DONE;
9400 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9401   [(set (match_operand:V2SF 0 "register_operand" "=w")
9402         (fma:V2SF
9403          (float_extend:V2SF
9404            (vec_select:V2HF
9405             (match_operand:V4HF 2 "register_operand" "w")
9406             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9407          (float_extend:V2SF
9408            (vec_duplicate:V2HF
9409             (vec_select:HF
9410              (match_operand:V4HF 3 "register_operand" "x")
9411              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9412          (match_operand:V2SF 1 "register_operand" "0")))]
9413   "TARGET_F16FML"
9414   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9415   [(set_attr "type" "neon_fp_mul_s")]
9418 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9419   [(set (match_operand:V2SF 0 "register_operand" "=w")
9420         (fma:V2SF
9421          (float_extend:V2SF
9422           (neg:V2HF
9423            (vec_select:V2HF
9424             (match_operand:V4HF 2 "register_operand" "w")
9425             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9426          (float_extend:V2SF
9427           (vec_duplicate:V2HF
9428            (vec_select:HF
9429             (match_operand:V4HF 3 "register_operand" "x")
9430             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9431          (match_operand:V2SF 1 "register_operand" "0")))]
9432   "TARGET_F16FML"
9433   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9434   [(set_attr "type" "neon_fp_mul_s")]
9437 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9438   [(set (match_operand:V2SF 0 "register_operand" "=w")
9439         (fma:V2SF
9440          (float_extend:V2SF
9441            (vec_select:V2HF
9442             (match_operand:V4HF 2 "register_operand" "w")
9443             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9444          (float_extend:V2SF
9445            (vec_duplicate:V2HF
9446             (vec_select:HF
9447              (match_operand:V4HF 3 "register_operand" "x")
9448              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9449          (match_operand:V2SF 1 "register_operand" "0")))]
9450   "TARGET_F16FML"
9451   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9452   [(set_attr "type" "neon_fp_mul_s")]
9455 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9456   [(set (match_operand:V2SF 0 "register_operand" "=w")
9457         (fma:V2SF
9458          (float_extend:V2SF
9459            (neg:V2HF
9460             (vec_select:V2HF
9461              (match_operand:V4HF 2 "register_operand" "w")
9462              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9463          (float_extend:V2SF
9464            (vec_duplicate:V2HF
9465             (vec_select:HF
9466              (match_operand:V4HF 3 "register_operand" "x")
9467              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9468          (match_operand:V2SF 1 "register_operand" "0")))]
9469   "TARGET_F16FML"
9470   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9471   [(set_attr "type" "neon_fp_mul_s")]
9474 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9475   [(set (match_operand:V4SF 0 "register_operand")
9476         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9477                            (match_operand:V8HF 2 "register_operand")
9478                            (match_operand:V8HF 3 "register_operand")
9479                            (match_operand:SI 4 "aarch64_lane_imm3")]
9480          VFMLA16_LOW))]
9481   "TARGET_F16FML"
9483     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9484     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9486     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9487                                                               operands[1],
9488                                                               operands[2],
9489                                                               operands[3],
9490                                                               p1, lane));
9491     DONE;
9494 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9495   [(set (match_operand:V4SF 0 "register_operand")
9496         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9497                            (match_operand:V8HF 2 "register_operand")
9498                            (match_operand:V8HF 3 "register_operand")
9499                            (match_operand:SI 4 "aarch64_lane_imm3")]
9500          VFMLA16_HIGH))]
9501   "TARGET_F16FML"
9503     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9504     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9506     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9507                                                                operands[1],
9508                                                                operands[2],
9509                                                                operands[3],
9510                                                                p1, lane));
9511     DONE;
9514 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9515   [(set (match_operand:V4SF 0 "register_operand" "=w")
9516         (fma:V4SF
9517          (float_extend:V4SF
9518           (vec_select:V4HF
9519             (match_operand:V8HF 2 "register_operand" "w")
9520             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9521          (float_extend:V4SF
9522           (vec_duplicate:V4HF
9523            (vec_select:HF
9524             (match_operand:V8HF 3 "register_operand" "x")
9525             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9526          (match_operand:V4SF 1 "register_operand" "0")))]
9527   "TARGET_F16FML"
9528   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9529   [(set_attr "type" "neon_fp_mul_s")]
9532 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9533   [(set (match_operand:V4SF 0 "register_operand" "=w")
9534         (fma:V4SF
9535           (float_extend:V4SF
9536            (neg:V4HF
9537             (vec_select:V4HF
9538              (match_operand:V8HF 2 "register_operand" "w")
9539              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9540          (float_extend:V4SF
9541           (vec_duplicate:V4HF
9542            (vec_select:HF
9543             (match_operand:V8HF 3 "register_operand" "x")
9544             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9545          (match_operand:V4SF 1 "register_operand" "0")))]
9546   "TARGET_F16FML"
9547   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9548   [(set_attr "type" "neon_fp_mul_s")]
9551 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9552   [(set (match_operand:V4SF 0 "register_operand" "=w")
9553         (fma:V4SF
9554          (float_extend:V4SF
9555           (vec_select:V4HF
9556             (match_operand:V8HF 2 "register_operand" "w")
9557             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9558          (float_extend:V4SF
9559           (vec_duplicate:V4HF
9560            (vec_select:HF
9561             (match_operand:V8HF 3 "register_operand" "x")
9562             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9563          (match_operand:V4SF 1 "register_operand" "0")))]
9564   "TARGET_F16FML"
9565   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9566   [(set_attr "type" "neon_fp_mul_s")]
9569 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9570   [(set (match_operand:V4SF 0 "register_operand" "=w")
9571         (fma:V4SF
9572          (float_extend:V4SF
9573           (neg:V4HF
9574            (vec_select:V4HF
9575             (match_operand:V8HF 2 "register_operand" "w")
9576             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9577          (float_extend:V4SF
9578           (vec_duplicate:V4HF
9579            (vec_select:HF
9580             (match_operand:V8HF 3 "register_operand" "x")
9581             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9582          (match_operand:V4SF 1 "register_operand" "0")))]
9583   "TARGET_F16FML"
9584   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9585   [(set_attr "type" "neon_fp_mul_s")]
9588 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9589   [(set (match_operand:V2SF 0 "register_operand")
9590         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9591                       (match_operand:V4HF 2 "register_operand")
9592                       (match_operand:V8HF 3 "register_operand")
9593                       (match_operand:SI 4 "aarch64_lane_imm3")]
9594          VFMLA16_LOW))]
9595   "TARGET_F16FML"
9597     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9598     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9600     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9601                                                              operands[1],
9602                                                              operands[2],
9603                                                              operands[3],
9604                                                              p1, lane));
9605     DONE;
9609 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9610   [(set (match_operand:V2SF 0 "register_operand")
9611         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9612                       (match_operand:V4HF 2 "register_operand")
9613                       (match_operand:V8HF 3 "register_operand")
9614                       (match_operand:SI 4 "aarch64_lane_imm3")]
9615          VFMLA16_HIGH))]
9616   "TARGET_F16FML"
9618     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9619     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9621     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9622                                                               operands[1],
9623                                                               operands[2],
9624                                                               operands[3],
9625                                                               p1, lane));
9626     DONE;
9630 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9631   [(set (match_operand:V2SF 0 "register_operand" "=w")
9632         (fma:V2SF
9633          (float_extend:V2SF
9634            (vec_select:V2HF
9635             (match_operand:V4HF 2 "register_operand" "w")
9636             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9637          (float_extend:V2SF
9638           (vec_duplicate:V2HF
9639            (vec_select:HF
9640             (match_operand:V8HF 3 "register_operand" "x")
9641             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9642          (match_operand:V2SF 1 "register_operand" "0")))]
9643   "TARGET_F16FML"
9644   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9645   [(set_attr "type" "neon_fp_mul_s")]
9648 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9649   [(set (match_operand:V2SF 0 "register_operand" "=w")
9650         (fma:V2SF
9651          (float_extend:V2SF
9652           (neg:V2HF
9653            (vec_select:V2HF
9654             (match_operand:V4HF 2 "register_operand" "w")
9655             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9656          (float_extend:V2SF
9657           (vec_duplicate:V2HF
9658            (vec_select:HF
9659             (match_operand:V8HF 3 "register_operand" "x")
9660             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9661          (match_operand:V2SF 1 "register_operand" "0")))]
9662   "TARGET_F16FML"
9663   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9664   [(set_attr "type" "neon_fp_mul_s")]
9667 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9668   [(set (match_operand:V2SF 0 "register_operand" "=w")
9669         (fma:V2SF
9670          (float_extend:V2SF
9671            (vec_select:V2HF
9672             (match_operand:V4HF 2 "register_operand" "w")
9673             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9674          (float_extend:V2SF
9675           (vec_duplicate:V2HF
9676            (vec_select:HF
9677             (match_operand:V8HF 3 "register_operand" "x")
9678             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9679          (match_operand:V2SF 1 "register_operand" "0")))]
9680   "TARGET_F16FML"
9681   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9682   [(set_attr "type" "neon_fp_mul_s")]
9685 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9686   [(set (match_operand:V2SF 0 "register_operand" "=w")
9687         (fma:V2SF
9688          (float_extend:V2SF
9689           (neg:V2HF
9690            (vec_select:V2HF
9691             (match_operand:V4HF 2 "register_operand" "w")
9692             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9693          (float_extend:V2SF
9694           (vec_duplicate:V2HF
9695            (vec_select:HF
9696             (match_operand:V8HF 3 "register_operand" "x")
9697             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9698          (match_operand:V2SF 1 "register_operand" "0")))]
9699   "TARGET_F16FML"
9700   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9701   [(set_attr "type" "neon_fp_mul_s")]
9704 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9705   [(set (match_operand:V4SF 0 "register_operand")
9706         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9707                       (match_operand:V8HF 2 "register_operand")
9708                       (match_operand:V4HF 3 "register_operand")
9709                       (match_operand:SI 4 "aarch64_imm2")]
9710          VFMLA16_LOW))]
9711   "TARGET_F16FML"
9713     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9714     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9716     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9717                                                              operands[1],
9718                                                              operands[2],
9719                                                              operands[3],
9720                                                              p1, lane));
9721     DONE;
9724 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9725   [(set (match_operand:V4SF 0 "register_operand")
9726         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9727                       (match_operand:V8HF 2 "register_operand")
9728                       (match_operand:V4HF 3 "register_operand")
9729                       (match_operand:SI 4 "aarch64_imm2")]
9730          VFMLA16_HIGH))]
9731   "TARGET_F16FML"
9733     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9734     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9736     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9737                                                               operands[1],
9738                                                               operands[2],
9739                                                               operands[3],
9740                                                               p1, lane));
9741     DONE;
9744 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9745   [(set (match_operand:V4SF 0 "register_operand" "=w")
9746         (fma:V4SF
9747          (float_extend:V4SF
9748           (vec_select:V4HF
9749            (match_operand:V8HF 2 "register_operand" "w")
9750            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9751          (float_extend:V4SF
9752           (vec_duplicate:V4HF
9753            (vec_select:HF
9754             (match_operand:V4HF 3 "register_operand" "x")
9755             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9756          (match_operand:V4SF 1 "register_operand" "0")))]
9757   "TARGET_F16FML"
9758   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9759   [(set_attr "type" "neon_fp_mul_s")]
9762 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9763   [(set (match_operand:V4SF 0 "register_operand" "=w")
9764         (fma:V4SF
9765          (float_extend:V4SF
9766           (neg:V4HF
9767            (vec_select:V4HF
9768             (match_operand:V8HF 2 "register_operand" "w")
9769             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9770          (float_extend:V4SF
9771           (vec_duplicate:V4HF
9772            (vec_select:HF
9773             (match_operand:V4HF 3 "register_operand" "x")
9774             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9775          (match_operand:V4SF 1 "register_operand" "0")))]
9776   "TARGET_F16FML"
9777   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9778   [(set_attr "type" "neon_fp_mul_s")]
9781 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9782   [(set (match_operand:V4SF 0 "register_operand" "=w")
9783         (fma:V4SF
9784          (float_extend:V4SF
9785           (vec_select:V4HF
9786            (match_operand:V8HF 2 "register_operand" "w")
9787            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9788          (float_extend:V4SF
9789           (vec_duplicate:V4HF
9790            (vec_select:HF
9791             (match_operand:V4HF 3 "register_operand" "x")
9792             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9793          (match_operand:V4SF 1 "register_operand" "0")))]
9794   "TARGET_F16FML"
9795   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9796   [(set_attr "type" "neon_fp_mul_s")]
9799 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9800   [(set (match_operand:V4SF 0 "register_operand" "=w")
9801         (fma:V4SF
9802          (float_extend:V4SF
9803           (neg:V4HF
9804            (vec_select:V4HF
9805             (match_operand:V8HF 2 "register_operand" "w")
9806             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9807          (float_extend:V4SF
9808           (vec_duplicate:V4HF
9809            (vec_select:HF
9810             (match_operand:V4HF 3 "register_operand" "x")
9811             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9812          (match_operand:V4SF 1 "register_operand" "0")))]
9813   "TARGET_F16FML"
9814   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9815   [(set_attr "type" "neon_fp_mul_s")]
9818 ;; pmull
9820 (define_insn "aarch64_crypto_pmulldi"
9821   [(set (match_operand:TI 0 "register_operand" "=w")
9822         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
9823                      (match_operand:DI 2 "register_operand" "w")]
9824                     UNSPEC_PMULL))]
9825  "TARGET_AES"
9826  "pmull\\t%0.1q, %1.1d, %2.1d"
9827   [(set_attr "type" "crypto_pmull")]
9830 (define_insn "aarch64_crypto_pmullv2di"
9831  [(set (match_operand:TI 0 "register_operand" "=w")
9832        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9833                    (match_operand:V2DI 2 "register_operand" "w")]
9834                   UNSPEC_PMULL2))]
9835   "TARGET_AES"
9836   "pmull2\\t%0.1q, %1.2d, %2.2d"
9837   [(set_attr "type" "crypto_pmull")]
9840 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9841 (define_insn_and_split "<optab><Vnarrowq><mode>2"
9842   [(set (match_operand:VQN 0 "register_operand" "=w")
9843         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9844   "TARGET_SIMD"
9845   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9846   "&& <CODE> == ZERO_EXTEND
9847    && aarch64_split_simd_shift_p (insn)"
9848   [(const_int 0)]
9849   {
9850     /* On many cores, it is cheaper to implement UXTL using a ZIP1 with zero,
9851        provided that the cost of the zero can be amortized over several
9852        operations.  We'll later recombine the zero and zip if there are
9853        not sufficient uses of the zero to make the split worthwhile.  */
9854     rtx res = simplify_gen_subreg (<VNARROWQ2>mode, operands[0],
9855                                    <MODE>mode, 0);
9856     rtx zero = aarch64_gen_shareable_zero (<VNARROWQ>mode);
9857     emit_insn (gen_aarch64_zip1<Vnarrowq2>_low (res, operands[1], zero));
9858     DONE;
9859   }
9860   [(set_attr "type" "neon_shift_imm_long")]
9863 (define_expand "aarch64_<su>xtl<mode>"
9864   [(set (match_operand:VQN 0 "register_operand" "=w")
9865         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9866   "TARGET_SIMD"
9867   ""
9870 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9871 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9872   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9873         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9874   "TARGET_SIMD"
9875   "xtn\t%0.<Vntype>, %1.<Vtype>"
9876   [(set_attr "type" "neon_move_narrow_q")]
9879 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9880 ;; trunc optab.
9881 (define_expand "aarch64_xtn<mode>"
9882   [(set (match_operand:<VNARROWQ> 0 "register_operand")
9883        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9884   "TARGET_SIMD"
9885   {}
9888 (define_insn "aarch64_bfdot<mode>"
9889   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9890         (plus:VDQSF
9891           (unspec:VDQSF
9892            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9893             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9894             UNSPEC_BFDOT)
9895           (match_operand:VDQSF 1 "register_operand" "0")))]
9896   "TARGET_BF16_SIMD"
9897   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9898   [(set_attr "type" "neon_dot<q>")]
9901 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9902   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9903         (plus:VDQSF
9904           (unspec:VDQSF
9905            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9906             (match_operand:VBF 3 "register_operand" "w")
9907             (match_operand:SI 4 "const_int_operand" "n")]
9908             UNSPEC_BFDOT)
9909           (match_operand:VDQSF 1 "register_operand" "0")))]
9910   "TARGET_BF16_SIMD"
9912   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9913   int lane = INTVAL (operands[4]);
9914   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9915   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9917   [(set_attr "type" "neon_dot<VDQSF:q>")]
9920 ;; bfmmla
9921 (define_insn "aarch64_bfmmlaqv4sf"
9922   [(set (match_operand:V4SF 0 "register_operand" "=w")
9923         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9924                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9925                                  (match_operand:V8BF 3 "register_operand" "w")]
9926                     UNSPEC_BFMMLA)))]
9927   "TARGET_BF16_SIMD"
9928   "bfmmla\\t%0.4s, %2.8h, %3.8h"
9929   [(set_attr "type" "neon_fp_mla_s_q")]
9932 ;; bfmlal<bt>
9933 (define_insn "aarch64_bfmlal<bt>v4sf"
9934   [(set (match_operand:V4SF 0 "register_operand" "=w")
9935         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9936                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9937                                   (match_operand:V8BF 3 "register_operand" "w")]
9938                      BF_MLA)))]
9939   "TARGET_BF16_SIMD"
9940   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9941   [(set_attr "type" "neon_fp_mla_s_q")]
9944 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9945   [(set (match_operand:V4SF 0 "register_operand" "=w")
9946         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9947                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9948                                   (match_operand:VBF 3 "register_operand" "x")
9949                                   (match_operand:SI 4 "const_int_operand" "n")]
9950                      BF_MLA)))]
9951   "TARGET_BF16_SIMD"
9953   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9954   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9956   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9959 ;; 8-bit integer matrix multiply-accumulate
9960 (define_insn "aarch64_simd_<sur>mmlav16qi"
9961   [(set (match_operand:V4SI 0 "register_operand" "=w")
9962         (plus:V4SI
9963          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9964                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9965          (match_operand:V4SI 1 "register_operand" "0")))]
9966   "TARGET_I8MM"
9967   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9968   [(set_attr "type" "neon_mla_s_q")]
9971 ;; bfcvtn
9972 (define_insn "aarch64_bfcvtn<q><mode>"
9973   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9974         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9975                             UNSPEC_BFCVTN))]
9976   "TARGET_BF16_SIMD"
9977   "bfcvtn\\t%0.4h, %1.4s"
9978   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9981 (define_insn "aarch64_bfcvtn2v8bf"
9982   [(set (match_operand:V8BF 0 "register_operand" "=w")
9983         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9984                       (match_operand:V4SF 2 "register_operand" "w")]
9985                       UNSPEC_BFCVTN2))]
9986   "TARGET_BF16_SIMD"
9987   "bfcvtn2\\t%0.8h, %2.4s"
9988   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9991 (define_insn "aarch64_bfcvtbf"
9992   [(set (match_operand:BF 0 "register_operand" "=w")
9993         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9994                     UNSPEC_BFCVT))]
9995   "TARGET_BF16_FP"
9996   "bfcvt\\t%h0, %s1"
9997   [(set_attr "type" "f_cvt")]
10000 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
10001 (define_insn "aarch64_vbfcvt<mode>"
10002   [(set (match_operand:V4SF 0 "register_operand" "=w")
10003         (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
10004                       UNSPEC_BFCVTN))]
10005   "TARGET_BF16_SIMD"
10006   "shll\\t%0.4s, %1.4h, #16"
10007   [(set_attr "type" "neon_shift_imm_long")]
10010 (define_insn "aarch64_vbfcvt_highv8bf"
10011   [(set (match_operand:V4SF 0 "register_operand" "=w")
10012         (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
10013                       UNSPEC_BFCVTN2))]
10014   "TARGET_BF16_SIMD"
10015   "shll2\\t%0.4s, %1.8h, #16"
10016   [(set_attr "type" "neon_shift_imm_long")]
10019 (define_insn "aarch64_bfcvtsf"
10020   [(set (match_operand:SF 0 "register_operand" "=w")
10021         (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
10022                     UNSPEC_BFCVT))]
10023   "TARGET_BF16_FP"
10024   "shl\\t%d0, %d1, #16"
10025   [(set_attr "type" "neon_shift_imm")]
10028 ;; faminmax
10029 (define_insn "@aarch64_<faminmax_uns_op><mode>"
10030   [(set (match_operand:VHSDF 0 "register_operand" "=w")
10031         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
10032                        (match_operand:VHSDF 2 "register_operand" "w")]
10033                       FAMINMAX_UNS))]
10034   "TARGET_FAMINMAX"
10035   "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10038 (define_insn "*aarch64_faminmax_fused"
10039   [(set (match_operand:VHSDF 0 "register_operand" "=w")
10040         (FMAXMIN:VHSDF
10041           (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
10042           (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"))))]
10043   "TARGET_FAMINMAX"
10044   "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10047 (define_insn "@aarch64_lut<VLUT:mode><VB:mode>"
10048   [(set (match_operand:<VLUT:VCONQ> 0 "register_operand" "=w")
10049         (unspec:<VLUT:VCONQ>
10050          [(match_operand:VLUT 1 "register_operand" "w")
10051           (match_operand:VB 2 "register_operand" "w")
10052           (match_operand:SI 3 "const_int_operand")
10053           (match_operand:SI 4 "const_int_operand")]
10054          UNSPEC_LUTI))]
10055   "TARGET_LUT && INTVAL (operands[4]) <= exact_log2 (<VLUT:nunits>)"
10056   "luti%4\t%0<VLUT:Vconqtype>, {%1<VLUT:Vconqtype>}, %2[%3]"
10059 ;; lutx2
10060 (define_insn "@aarch64_lut<VLUTx2:mode><VB:mode>"
10061   [(set (match_operand:<VSTRUCT_ELT> 0 "register_operand" "=w")
10062         (unspec:<VSTRUCT_ELT>
10063          [(match_operand:VLUTx2 1 "register_operand" "w")
10064           (match_operand:VB 2 "register_operand" "w")
10065           (match_operand:SI 3 "const_int_operand")
10066           (match_operand:SI 4 "const_int_operand")]
10067          UNSPEC_LUTI))]
10068   "TARGET_LUT && INTVAL (operands[4]) == 4"
10069   "luti%4\t%0.8h, {%S1.8h, %T1.8h}, %2[%3]"
10072 ;; fpm unary instructions (low part).
10073 (define_insn "@aarch64_<insn><mode>"
10074   [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
10075         (unspec:VQ_BHF
10076          [(match_operand:V8QI 1 "register_operand" "w")
10077           (reg:DI FPM_REGNUM)]
10078         FPM_UNARY_UNS))]
10079   "TARGET_FP8"
10080   "<b><insn>\t%0.<Vtype>, %1.8b"
10083 ;; fpm unary instructions (high part).
10084 (define_insn "@aarch64_<insn><mode>_high"
10085   [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
10086         (unspec:VQ_BHF
10087          [(vec_select:V8QI
10088             (match_operand:V16QI 1 "register_operand" "w")
10089             (match_operand:V16QI 2 "vect_par_cnst_hi_half"))
10090           (reg:DI FPM_REGNUM)]
10091         FPM_UNARY_UNS))]
10092   "TARGET_FP8"
10093   "<b><insn>2\t%0.<Vtype>, %1.16b"
10096 ;; fpm binary instructions.
10097 (define_insn "@aarch64_<insn><mode>"
10098   [(set (match_operand:<VPACKB> 0 "register_operand" "=w")
10099         (unspec:<VPACKB>
10100          [(match_operand:VCVTFPM 1 "register_operand" "w")
10101           (match_operand:VCVTFPM 2 "register_operand" "w")
10102           (reg:DI FPM_REGNUM)]
10103          FPM_BINARY_UNS))]
10104   "TARGET_FP8"
10105   "<insn>\t%0.<VPACKBtype>, %1.<Vtype>, %2.<Vtype>"
10108 ;; fpm binary instructions & merge with low.
10109 (define_insn "@aarch64_<insn><mode>_high_le"
10110   [(set (match_operand:V16QI 0 "register_operand" "=w")
10111         (vec_concat:V16QI
10112           (match_operand:V8QI 1 "register_operand" "0")
10113           (unspec:V8QI
10114             [(match_operand:V4SF_ONLY 2 "register_operand" "w")
10115              (match_operand:V4SF_ONLY 3 "register_operand" "w")
10116              (reg:DI FPM_REGNUM)]
10117             FPM_BINARY_UNS)))]
10118   "TARGET_FP8 && !BYTES_BIG_ENDIAN"
10119   "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
10122 (define_insn "@aarch64_<insn><mode>_high_be"
10123   [(set (match_operand:V16QI 0 "register_operand" "=w")
10124         (vec_concat:V16QI
10125           (unspec:V8QI
10126             [(match_operand:V4SF_ONLY 2 "register_operand" "w")
10127              (match_operand:V4SF_ONLY 3 "register_operand" "w")
10128              (reg:DI FPM_REGNUM)]
10129             FPM_BINARY_UNS)
10130           (match_operand:V8QI 1 "register_operand" "0")))]
10131   "TARGET_FP8 && BYTES_BIG_ENDIAN"
10132   "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
10135 ;; fscale instructions
10136 (define_insn "@aarch64_<insn><mode>"
10137   [(set (match_operand:VHSDF 0 "register_operand" "=w")
10138         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
10139                        (match_operand:<FCVT_TARGET> 2 "register_operand" "w")]
10140                       FSCALE_UNS))]
10141   "TARGET_FP8"
10142   "<insn>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10145 ;; fpm vdot instructions.  The target requirements are enforced by
10146 ;; VDQ_HSF_FDOT.
10147 (define_insn "@aarch64_<insn><mode>"
10148   [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
10149         (unspec:VDQ_HSF_FDOT
10150          [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
10151           (match_operand:<VNARROWB> 2 "register_operand" "w")
10152           (match_operand:<VNARROWB> 3 "register_operand" "w")
10153           (reg:DI FPM_REGNUM)]
10154          FPM_FDOT))]
10155   ""
10156   "<insn>\t%1.<Vtype>, %2.<Vnbtype>, %3.<Vnbtype>"
10159 (define_insn "@aarch64_<insn>_lane<VDQ_HSF_FDOT:mode><VB:mode>"
10160   [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
10161         (unspec:VDQ_HSF_FDOT
10162          [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
10163           (match_operand:<VDQ_HSF_FDOT:VNARROWB> 2 "register_operand" "w")
10164           (match_operand:VB 3 "register_operand" "w")
10165           (match_operand 4 "const_int_operand")
10166           (reg:DI FPM_REGNUM)]
10167          FPM_FDOT_LANE))]
10168   ""
10169   "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
10172 ;; fpm fma instructions.
10173 (define_insn "@aarch64_<insn><mode>"
10174   [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
10175         (unspec:V8HF_ONLY
10176          [(match_operand:V8HF_ONLY 1 "register_operand" "0")
10177           (match_operand:V16QI 2 "register_operand" "w")
10178           (match_operand:V16QI 3 "register_operand" "w")
10179           (reg:DI FPM_REGNUM)]
10180         FMLAL_FP8_HF))]
10181   "TARGET_FP8FMA"
10182   "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
10185 (define_insn "@aarch64_<insn>_lane<V8HF_ONLY:mode><VB:mode>"
10186   [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
10187         (unspec:V8HF_ONLY
10188          [(match_operand:V8HF_ONLY 1 "register_operand" "0")
10189           (match_operand:V16QI 2 "register_operand" "w")
10190           (vec_duplicate:V16QI
10191             (vec_select:QI
10192               (match_operand:VB 3 "register_operand" "w")
10193               (parallel [(match_operand:SI 4 "immediate_operand")])))
10194           (reg:DI FPM_REGNUM)]
10195         FMLAL_FP8_HF))]
10196   "TARGET_FP8FMA"
10197   {
10198     operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
10199                                            INTVAL (operands[4]));
10200     return "<insn>\t%0.<V8HF_ONLY:Vtype>, %2.16b, %3.b[%4]";
10201   }
10204 (define_insn "@aarch64_<insn><mode>"
10205   [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
10206         (unspec:V4SF_ONLY
10207          [(match_operand:V4SF_ONLY 1 "register_operand" "0")
10208           (match_operand:V16QI 2 "register_operand" "w")
10209           (match_operand:V16QI 3 "register_operand" "w")
10210           (reg:DI FPM_REGNUM)]
10211         FMLALL_FP8_SF))]
10212   "TARGET_FP8FMA"
10213   "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
10216 (define_insn "@aarch64_<insn>_lane<V4SF_ONLY:mode><VB:mode>"
10217   [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
10218         (unspec:V4SF_ONLY
10219          [(match_operand:V4SF_ONLY 1 "register_operand" "0")
10220           (match_operand:V16QI 2 "register_operand" "w")
10221           (vec_duplicate:V16QI
10222             (vec_select:QI
10223               (match_operand:VB 3 "register_operand" "w")
10224               (parallel [(match_operand:SI 4 "immediate_operand")])))
10225           (reg:DI FPM_REGNUM)]
10226         FMLALL_FP8_SF))]
10227   "TARGET_FP8FMA"
10228   {
10229     operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
10230                                            INTVAL (operands[4]));
10231     return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]";
10232   }