gcc/config/aarch64/aarch64-simd.md

   1 ;; Machine description for AArch64 AdvSIMD architecture.
   2 ;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The following define_subst rules are used to produce patterns representing
  22 ;; the implicit zeroing effect of 64-bit Advanced SIMD operations, in effect
  23 ;; a vec_concat with zeroes.  The order of the vec_concat operands differs
  24 ;; for big-endian so we have a separate define_subst rule for each endianness.
  25 (define_subst "add_vec_concat_subst_le"
  26   [(set (match_operand:VDZ 0)
  27         (match_operand:VDZ 1))]
  28   "!BYTES_BIG_ENDIAN"
  29   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
  30         (vec_concat:<VDBL>
  31          (match_dup 1)
  32          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")))])
  33
  34 (define_subst "add_vec_concat_subst_be"
  35   [(set (match_operand:VDZ 0)
  36         (match_operand:VDZ 1))]
  37   "BYTES_BIG_ENDIAN"
  38   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
  39         (vec_concat:<VDBL>
  40          (match_operand:VDZ 2 "aarch64_simd_or_scalar_imm_zero")
  41          (match_dup 1)))])
  42
  43 ;; The subst_attr definitions used to annotate patterns further in the file.
  44 ;; Patterns that need to have the above substitutions added to them should
  45 ;; have <vczle><vczbe> added to their name.
  46 (define_subst_attr "vczle" "add_vec_concat_subst_le" "" "_vec_concatz_le")
  47 (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
  48
  49 (define_expand "mov<mode>"
  50   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  51         (match_operand:VALL_F16 1 "general_operand"))]
  52   "TARGET_FLOAT"
  53   "
  54   /* Force the operand into a register if it is not an
  55      immediate whose use can be replaced with xzr.
  56      If the mode is 16 bytes wide, then we will be doing
  57      a stp in DI mode, so we check the validity of that.
  58      If the mode is 8 bytes wide, then we will do doing a
  59      normal str, so the check need not apply.  */
  60   if (GET_CODE (operands[0]) == MEM
  61       && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
  62            && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
  63                 && aarch64_mem_pair_operand (operands[0], DImode))
  64                || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
  65       operands[1] = force_reg (<MODE>mode, operands[1]);
  66
  67   /* If a constant is too complex to force to memory (e.g. because it
  68      contains CONST_POLY_INTs), build it up from individual elements instead.
  69      We should only need to do this before RA; aarch64_legitimate_constant_p
  70      should ensure that we don't try to rematerialize the constant later.  */
  71   if (GET_CODE (operands[1]) == CONST_VECTOR
  72       && targetm.cannot_force_const_mem (<MODE>mode, operands[1]))
  73     {
  74       aarch64_expand_vector_init (operands[0], operands[1]);
  75       DONE;
  76     }
  77   "
  78 )
  79
  80 (define_expand "movmisalign<mode>"
  81   [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
  82         (match_operand:VALL_F16 1 "general_operand"))]
  83   "TARGET_FLOAT && !STRICT_ALIGNMENT"
  84 {
  85   /* This pattern is not permitted to fail during expansion: if both arguments
  86      are non-registers (e.g. memory := constant, which can be created by the
  87      auto-vectorizer), force operand 1 into a register.  */
  88   if (!register_operand (operands[0], <MODE>mode)
  89       && !register_operand (operands[1], <MODE>mode))
  90     operands[1] = force_reg (<MODE>mode, operands[1]);
  91 })
  92
  93 (define_insn "aarch64_simd_dup<mode>"
  94   [(set (match_operand:VDQ_I 0 "register_operand")
  95         (vec_duplicate:VDQ_I
  96           (match_operand:<VEL> 1 "register_operand")))]
  97   "TARGET_SIMD"
  98   {@ [ cons: =0 , 1  ; attrs: type      ]
  99      [ w        , w  ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
 100      [ w        , ?r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
 101   }
 102 )
 103
 104 (define_insn "aarch64_simd_dup<mode>"
 105   [(set (match_operand:VDQF_F16 0 "register_operand")
 106         (vec_duplicate:VDQF_F16
 107           (match_operand:<VEL> 1 "register_operand")))]
 108   "TARGET_SIMD"
 109   {@ [ cons: =0 , 1 ; attrs: type      ]
 110      [ w        , w ; neon_dup<q>      ] dup\t%0.<Vtype>, %1.<Vetype>[0]
 111      [ w        , r ; neon_from_gp<q>  ] dup\t%0.<Vtype>, %<vwcore>1
 112   }
 113 )
 114
 115 (define_insn "@aarch64_dup_lane<mode>"
 116   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 117         (vec_duplicate:VALL_F16
 118           (vec_select:<VEL>
 119             (match_operand:VALL_F16 1 "register_operand" "w")
 120             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
 121           )))]
 122   "TARGET_SIMD"
 123   {
 124     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
 125     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 126   }
 127   [(set_attr "type" "neon_dup<q>")]
 128 )
 129
 130 (define_insn "@aarch64_dup_lane_<vswap_width_name><mode>"
 131   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 132         (vec_duplicate:VALL_F16_NO_V2Q
 133           (vec_select:<VEL>
 134             (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
 135             (parallel [(match_operand:SI 2 "immediate_operand" "i")])
 136           )))]
 137   "TARGET_SIMD"
 138   {
 139     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
 140     return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
 141   }
 142   [(set_attr "type" "neon_dup<q>")]
 143 )
 144
 145 (define_insn_and_split "*aarch64_simd_mov<VDMOV:mode>"
 146   [(set (match_operand:VDMOV 0 "nonimmediate_operand")
 147         (match_operand:VDMOV 1 "general_operand"))]
 148   "TARGET_FLOAT
 149    && (register_operand (operands[0], <MODE>mode)
 150        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 151   {@ [cons: =0, 1; attrs: type, arch, length]
 152      [w , m ; neon_load1_1reg<q> , *        , *] ldr\t%d0, %1
 153      [r , m ; load_8             , *        , *] ldr\t%x0, %1
 154      [m , Dz; store_8            , *        , *] str\txzr, %0
 155      [m , w ; neon_store1_1reg<q>, *        , *] str\t%d1, %0
 156      [m , r ; store_8            , *        , *] str\t%x1, %0
 157      [w , w ; neon_logic<q>      , simd     , *] mov\t%0.<Vbtype>, %1.<Vbtype>
 158      [w , w ; neon_logic<q>      , *        , *] fmov\t%d0, %d1
 159      [?r, w ; neon_to_gp<q>      , base_simd, *] umov\t%0, %1.d[0]
 160      [?r, w ; neon_to_gp<q>      , *        , *] fmov\t%x0, %d1
 161      [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
 162      [?r, r ; mov_reg            , *        , *] mov\t%0, %1
 163      [w , Dn; neon_move<q>       , simd     , *] << aarch64_output_simd_mov_imm (operands[1], 64);
 164      [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
 165      [w , Dx; neon_move          , simd     , 8] #
 166   }
 167   "CONST_INT_P (operands[1])
 168    && aarch64_simd_special_constant_p (operands[1], <MODE>mode)
 169    && FP_REGNUM_P (REGNO (operands[0]))"
 170   [(const_int 0)]
 171   {
 172     aarch64_maybe_generate_simd_constant (operands[0], operands[1], <MODE>mode);
 173     DONE;
 174   }
 175 )
 176
 177 (define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
 178   [(set (match_operand:VQMOV 0 "nonimmediate_operand")
 179         (match_operand:VQMOV 1 "general_operand"))]
 180   "TARGET_FLOAT
 181    && (register_operand (operands[0], <MODE>mode)
 182        || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
 183   {@ [cons: =0, 1; attrs: type, arch, length]
 184      [w  , m ; neon_load1_1reg<q> , *   , 4] ldr\t%q0, %1
 185      [Umn, Dz; store_16           , *   , 4] stp\txzr, xzr, %0
 186      [m  , w ; neon_store1_1reg<q>, *   , 4] str\t%q1, %0
 187      [w  , w ; neon_logic<q>      , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
 188      [w  , w ; *                  , sve , 4] mov\t%Z0.d, %Z1.d
 189      [?r , w ; multiple           , *   , 8] #
 190      [?w , r ; multiple           , *   , 8] #
 191      [?r , r ; multiple           , *   , 8] #
 192      [w  , Dn; neon_move<q>       , simd, 4] << aarch64_output_simd_mov_imm (operands[1], 128);
 193      [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
 194      [w  , Dx; neon_move          , simd, 8] #
 195   }
 196   "&& reload_completed
 197    && ((REG_P (operands[0])
 198         && REG_P (operands[1])
 199         && !(FP_REGNUM_P (REGNO (operands[0]))
 200              && FP_REGNUM_P (REGNO (operands[1]))))
 201        || (aarch64_simd_special_constant_p (operands[1], <MODE>mode)
 202            && FP_REGNUM_P (REGNO (operands[0]))))"
 203   [(const_int 0)]
 204   {
 205     if (GP_REGNUM_P (REGNO (operands[0]))
 206         && GP_REGNUM_P (REGNO (operands[1])))
 207       aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
 208     else
 209       {
 210         if (FP_REGNUM_P (REGNO (operands[0]))
 211             && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
 212                                                      <MODE>mode))
 213           ;
 214         else
 215           aarch64_split_simd_move (operands[0], operands[1]);
 216       }
 217     DONE;
 218   }
 219 )
 220
 221 ;; When storing lane zero we can use the normal STR and its more permissive
 222 ;; addressing modes.
 223
 224 (define_insn "aarch64_store_lane0<mode>"
 225   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
 226         (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 227                         (parallel [(match_operand 2 "const_int_operand" "n")])))]
 228   "TARGET_FLOAT
 229    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
 230   "str\\t%<Vetype>1, %0"
 231   [(set_attr "type" "neon_store1_1reg<q>")]
 232 )
 233
 234 (define_insn "aarch64_simd_stp<mode>"
 235   [(set (match_operand:VP_2E 0 "aarch64_mem_pair_lanes_operand")
 236         (vec_duplicate:VP_2E (match_operand:<VEL> 1 "register_operand")))]
 237   "TARGET_SIMD"
 238   {@ [ cons: =0 , 1 ; attrs: type            ]
 239      [ Umn      , w ; neon_stp               ] stp\t%<Vetype>1, %<Vetype>1, %y0
 240      [ Umn      , r ; store_<ldpstp_vel_sz>  ] stp\t%<vwcore>1, %<vwcore>1, %y0
 241   }
 242 )
 243
 244 (define_expand "@aarch64_split_simd_mov<mode>"
 245   [(set (match_operand:VQMOV 0)
 246         (match_operand:VQMOV 1))]
 247   "TARGET_FLOAT"
 248   {
 249     rtx dst = operands[0];
 250     rtx src = operands[1];
 251
 252     if (GP_REGNUM_P (REGNO (src)))
 253       {
 254         rtx src_low_part = gen_lowpart (<VHALF>mode, src);
 255         rtx src_high_part = gen_highpart (<VHALF>mode, src);
 256         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 257
 258         emit_move_insn (dst_low_part, src_low_part);
 259         emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part,
 260                                                src_high_part));
 261       }
 262     else
 263       {
 264         rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
 265         rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
 266         rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 267         rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 268         emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo));
 269         emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi));
 270       }
 271     DONE;
 272   }
 273 )
 274
 275 (define_expand "aarch64_get_half<mode>"
 276   [(set (match_operand:<VHALF> 0 "register_operand")
 277         (vec_select:<VHALF>
 278           (match_operand:VQMOV 1 "register_operand")
 279           (match_operand 2 "ascending_int_parallel")))]
 280   "TARGET_FLOAT"
 281   {
 282     if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
 283       {
 284         emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
 285         DONE;
 286       }
 287   }
 288 )
 289
 290 (define_insn_and_split "aarch64_simd_mov_from_<mode>low"
 291   [(set (match_operand:<VHALF> 0 "register_operand")
 292         (vec_select:<VHALF>
 293           (match_operand:VQMOV_NO2E 1 "register_operand")
 294           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half")))]
 295   "TARGET_FLOAT"
 296   {@ [ cons: =0 , 1 ; attrs: type   , arch      ]
 297      [ w        , w ; mov_reg       , simd      ] #
 298      [ ?r       , w ; neon_to_gp<q> , base_simd ] umov\t%0, %1.d[0]
 299      [ ?r       , w ; f_mrc         , *         ] fmov\t%0, %d1
 300   }
 301   "&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
 302   [(set (match_dup 0) (match_dup 1))]
 303   {
 304     operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
 305   }
 306   [(set_attr "length" "4")]
 307 )
 308
 309 (define_insn "aarch64_simd_mov_from_<mode>high"
 310   [(set (match_operand:<VHALF> 0 "register_operand")
 311         (vec_select:<VHALF>
 312           (match_operand:VQMOV_NO2E 1 "register_operand")
 313           (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half")))]
 314   "TARGET_FLOAT"
 315   {@ [ cons: =0 , 1 ; attrs: type   , arch  ]
 316      [ w        , w ; neon_dup<q>   , simd  ] dup\t%d0, %1.d[1]
 317      [ w        , w ; *             , sve   ] ext\t%Z0.b, %Z0.b, %Z0.b, #8
 318      [ ?r       , w ; neon_to_gp<q> , simd  ] umov\t%0, %1.d[1]
 319      [ ?r       , w ; f_mrc         , *     ] fmov\t%0, %1.d[1]
 320   }
 321   [(set_attr "length" "4")]
 322 )
 323
 324 (define_insn "iorn<mode>3<vczle><vczbe>"
 325  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 326        (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
 327                 (match_operand:VDQ_I 1 "register_operand" "w")))]
 328  "TARGET_SIMD"
 329  "orn\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 330   [(set_attr "type" "neon_logic<q>")]
 331 )
 332
 333 (define_insn "andn<mode>3<vczle><vczbe>"
 334  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 335        (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 2 "register_operand" "w"))
 336                 (match_operand:VDQ_I 1 "register_operand" "w")))]
 337  "TARGET_SIMD"
 338  "bic\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
 339   [(set_attr "type" "neon_logic<q>")]
 340 )
 341
 342 (define_insn "add<mode>3<vczle><vczbe>"
 343   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 344         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 345                   (match_operand:VDQ_I 2 "register_operand" "w")))]
 346   "TARGET_SIMD"
 347   "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 348   [(set_attr "type" "neon_add<q>")]
 349 )
 350
 351 (define_insn "sub<mode>3<vczle><vczbe>"
 352   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 353         (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
 354                    (match_operand:VDQ_I 2 "register_operand" "w")))]
 355   "TARGET_SIMD"
 356   "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 357   [(set_attr "type" "neon_sub<q>")]
 358 )
 359
 360 (define_insn "mul<mode>3<vczle><vczbe>"
 361   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 362         (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
 363                    (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
 364   "TARGET_SIMD"
 365   "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 366   [(set_attr "type" "neon_mul_<Vetype><q>")]
 367 )
 368
 369 (define_insn "bswap<mode>2"
 370   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
 371         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
 372   "TARGET_SIMD"
 373   "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
 374   [(set_attr "type" "neon_rev<q>")]
 375 )
 376
 377 (define_insn "aarch64_rbit<mode><vczle><vczbe>"
 378   [(set (match_operand:VB 0 "register_operand" "=w")
 379         (bitreverse:VB (match_operand:VB 1 "register_operand" "w")))]
 380   "TARGET_SIMD"
 381   "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
 382   [(set_attr "type" "neon_rbit")]
 383 )
 384
 385 (define_expand "ctz<mode>2"
 386   [(set (match_operand:VS 0 "register_operand")
 387         (ctz:VS (match_operand:VS 1 "register_operand")))]
 388   "TARGET_SIMD"
 389   {
 390      emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
 391      rtx op0_castsi2qi = force_subreg (<VS:VSI2QI>mode, operands[0],
 392                                        <MODE>mode, 0);
 393      emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
 394      emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
 395      DONE;
 396   }
 397 )
 398
 399 (define_expand "@xorsign<mode>3"
 400   [(match_operand:VHSDF 0 "register_operand")
 401    (match_operand:VHSDF 1 "register_operand")
 402    (match_operand:VHSDF 2 "register_operand")]
 403   "TARGET_SIMD"
 404 {
 405
 406   machine_mode imode = <V_INT_EQUIV>mode;
 407   rtx v_bitmask = gen_reg_rtx (imode);
 408   rtx op1x = gen_reg_rtx (imode);
 409   rtx op2x = gen_reg_rtx (imode);
 410
 411   rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
 412   rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
 413
 414   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 415
 416   emit_move_insn (v_bitmask,
 417                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 418                                                      HOST_WIDE_INT_M1U << bits));
 419
 420   emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
 421   emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
 422   emit_move_insn (operands[0],
 423                   lowpart_subreg (<MODE>mode, op1x, imode));
 424   DONE;
 425 }
 426 )
 427
 428 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
 429 ;; fact that their usage need to guarantee that the source vectors are
 430 ;; contiguous.  It would be wrong to describe the operation without being able
 431 ;; to describe the permute that is also required, but even if that is done
 432 ;; the permute would have been created as a LOAD_LANES which means the values
 433 ;; in the registers are in the wrong order.
 434 (define_insn "aarch64_fcadd<rot><mode><vczle><vczbe>"
 435   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 436         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
 437                        (match_operand:VHSDF 2 "register_operand" "w")]
 438                        FCADD))]
 439   "TARGET_COMPLEX"
 440   "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
 441   [(set_attr "type" "neon_fcadd")]
 442 )
 443
 444 (define_expand "cadd<rot><mode>3"
 445   [(set (match_operand:VHSDF 0 "register_operand")
 446         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 447                        (match_operand:VHSDF 2 "register_operand")]
 448                        FCADD))]
 449   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 450 )
 451
 452 (define_insn "aarch64_fcmla<rot><mode><vczle><vczbe>"
 453   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 454         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 455                                    (match_operand:VHSDF 3 "register_operand" "w")]
 456                                    FCMLA)
 457                     (match_operand:VHSDF 1 "register_operand" "0")))]
 458   "TARGET_COMPLEX"
 459   "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
 460   [(set_attr "type" "neon_fcmla")]
 461 )
 462
 463
 464 (define_insn "aarch64_fcmla_lane<rot><mode><vczle><vczbe>"
 465   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 466         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
 467                                    (match_operand:VHSDF 3 "register_operand" "w")
 468                                    (match_operand:SI 4 "const_int_operand" "n")]
 469                                    FCMLA)
 470                     (match_operand:VHSDF 1 "register_operand" "0")))]
 471   "TARGET_COMPLEX"
 472 {
 473   operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
 474   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 475 }
 476   [(set_attr "type" "neon_fcmla")]
 477 )
 478
 479 (define_insn "aarch64_fcmla_laneq<rot>v4hf<vczle><vczbe>"
 480   [(set (match_operand:V4HF 0 "register_operand" "=w")
 481         (plus:V4HF (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
 482                                  (match_operand:V8HF 3 "register_operand" "w")
 483                                  (match_operand:SI 4 "const_int_operand" "n")]
 484                                  FCMLA)
 485                    (match_operand:V4HF 1 "register_operand" "0")))]
 486   "TARGET_COMPLEX"
 487 {
 488   operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
 489   return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
 490 }
 491   [(set_attr "type" "neon_fcmla")]
 492 )
 493
 494 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
 495   [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
 496         (plus:VQ_HSF (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
 497                                      (match_operand:<VHALF> 3 "register_operand" "w")
 498                                      (match_operand:SI 4 "const_int_operand" "n")]
 499                                      FCMLA)
 500                      (match_operand:VQ_HSF 1 "register_operand" "0")))]
 501   "TARGET_COMPLEX"
 502 {
 503   int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
 504   operands[4]
 505     = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
 506   return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
 507 }
 508   [(set_attr "type" "neon_fcmla")]
 509 )
 510
 511 ;; The complex mla/mls operations always need to expand to two instructions.
 512 ;; The first operation does half the computation and the second does the
 513 ;; remainder.  Because of this, expand early.
 514 (define_expand "cml<fcmac1><conj_op><mode>4"
 515   [(set (match_operand:VHSDF 0 "register_operand")
 516         (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 517                                    (match_operand:VHSDF 2 "register_operand")]
 518                                    FCMLA_OP)
 519                     (match_operand:VHSDF 3 "register_operand")))]
 520   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 521 {
 522   rtx tmp = gen_reg_rtx (<MODE>mode);
 523   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3],
 524                                                  operands[2], operands[1]));
 525   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp,
 526                                                  operands[2], operands[1]));
 527   DONE;
 528 })
 529
 530 ;; The complex mul operations always need to expand to two instructions.
 531 ;; The first operation does half the computation and the second does the
 532 ;; remainder.  Because of this, expand early.
 533 (define_expand "cmul<conj_op><mode>3"
 534   [(set (match_operand:VHSDF 0 "register_operand")
 535         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")
 536                        (match_operand:VHSDF 2 "register_operand")]
 537                        FCMUL_OP))]
 538   "TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
 539 {
 540   rtx tmp = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
 541   rtx res1 = gen_reg_rtx (<MODE>mode);
 542   emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (res1, tmp,
 543                                                  operands[2], operands[1]));
 544   emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], res1,
 545                                                  operands[2], operands[1]));
 546   DONE;
 547 })
 548
 549 ;; These expands map to the Dot Product optab the vectorizer checks for
 550 ;; and to the intrinsics patttern.
 551 ;; The auto-vectorizer expects a dot product builtin that also does an
 552 ;; accumulation into the provided register.
 553 ;; Given the following pattern
 554 ;;
 555 ;; for (i=0; i<len; i++) {
 556 ;;     c = a[i] * b[i];
 557 ;;     r += c;
 558 ;; }
 559 ;; return result;
 560 ;;
 561 ;; This can be auto-vectorized to
 562 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
 563 ;;
 564 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
 565 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
 566 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
 567 ;; ...
 568 ;;
 569 ;; and so the vectorizer provides r, in which the result has to be accumulated.
 570 (define_insn "<sur>dot_prod<mode><vsi2qi><vczle><vczbe>"
 571   [(set (match_operand:VS 0 "register_operand" "=w")
 572         (plus:VS
 573           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 574                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 575                       DOTPROD)
 576           (match_operand:VS 3 "register_operand" "0")))]
 577   "TARGET_DOTPROD"
 578   "<sur>dot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 579   [(set_attr "type" "neon_dot<q>")]
 580 )
 581
 582 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
 583 ;; (vector) Dot Product operation and the vectorized optab.
 584 (define_insn "usdot_prod<mode><vsi2qi><vczle><vczbe>"
 585   [(set (match_operand:VS 0 "register_operand" "=w")
 586         (plus:VS
 587           (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
 588                       (match_operand:<VSI2QI> 2 "register_operand" "w")]
 589           UNSPEC_USDOT)
 590           (match_operand:VS 3 "register_operand" "0")))]
 591   "TARGET_I8MM"
 592   "usdot\\t%0.<Vtype>, %1.<Vdottype>, %2.<Vdottype>"
 593   [(set_attr "type" "neon_dot<q>")]
 594 )
 595
 596 ;; These instructions map to the __builtins for the Dot Product
 597 ;; indexed operations.
 598 (define_insn "aarch64_<sur>dot_lane<vsi2qi><vczle><vczbe>"
 599   [(set (match_operand:VS 0 "register_operand" "=w")
 600         (plus:VS
 601           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 602                       (match_operand:V8QI 3 "register_operand" "<h_con>")
 603                       (match_operand:SI 4 "immediate_operand" "i")]
 604                       DOTPROD)
 605           (match_operand:VS 1 "register_operand" "0")))]
 606   "TARGET_DOTPROD"
 607   {
 608     operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
 609     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 610   }
 611   [(set_attr "type" "neon_dot<q>")]
 612 )
 613
 614 (define_insn "aarch64_<sur>dot_laneq<vsi2qi><vczle><vczbe>"
 615   [(set (match_operand:VS 0 "register_operand" "=w")
 616         (plus:VS
 617           (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
 618                       (match_operand:V16QI 3 "register_operand" "<h_con>")
 619                       (match_operand:SI 4 "immediate_operand" "i")]
 620                       DOTPROD)
 621           (match_operand:VS 1 "register_operand" "0")))]
 622   "TARGET_DOTPROD"
 623   {
 624     operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
 625     return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
 626   }
 627   [(set_attr "type" "neon_dot<q>")]
 628 )
 629
 630 ;; These instructions map to the __builtins for the armv8.6a I8MM usdot, sudot
 631 ;; (by element) Dot Product operations.
 632 (define_insn "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><vczbe>"
 633   [(set (match_operand:VS 0 "register_operand" "=w")
 634         (plus:VS
 635           (unspec:VS [(match_operand:<VS:VSI2QI> 2 "register_operand" "w")
 636                       (match_operand:VB 3 "register_operand" "w")
 637                       (match_operand:SI 4 "immediate_operand" "i")]
 638           DOTPROD_I8MM)
 639           (match_operand:VS 1 "register_operand" "0")))]
 640   "TARGET_I8MM"
 641   {
 642     int nunits = GET_MODE_NUNITS (<VB:MODE>mode).to_constant ();
 643     int lane = INTVAL (operands[4]);
 644     operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane), SImode);
 645     return "<DOTPROD_I8MM:sur>dot\\t%0.<VS:Vtype>, %2.<VS:Vdottype>, %3.4b[%4]";
 646   }
 647   [(set_attr "type" "neon_dot<VS:q>")]
 648 )
 649
 650 (define_expand "copysign<mode>3"
 651   [(match_operand:VHSDF 0 "register_operand")
 652    (match_operand:VHSDF 1 "register_operand")
 653    (match_operand:VHSDF 2 "nonmemory_operand")]
 654   "TARGET_SIMD"
 655 {
 656   machine_mode int_mode = <V_INT_EQUIV>mode;
 657   rtx v_bitmask = gen_reg_rtx (int_mode);
 658   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 659
 660   emit_move_insn (v_bitmask,
 661                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
 662                                                      HOST_WIDE_INT_M1U << bits));
 663
 664   /* copysign (x, -1) should instead be expanded as orr with the sign
 665      bit.  */
 666   if (!REG_P (operands[2]))
 667     {
 668       rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
 669       if (GET_CODE (op2_elt) == CONST_DOUBLE
 670           && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
 671         {
 672           emit_insn (gen_ior<v_int_equiv>3 (
 673             lowpart_subreg (int_mode, operands[0], <MODE>mode),
 674             lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
 675           DONE;
 676         }
 677     }
 678
 679   operands[2] = force_reg (<MODE>mode, operands[2]);
 680   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
 681                                          operands[2], operands[1]));
 682   DONE;
 683 }
 684 )
 685
 686 (define_insn "mul_lane<mode>3"
 687  [(set (match_operand:VMULD 0 "register_operand" "=w")
 688        (mult:VMULD
 689          (vec_duplicate:VMULD
 690            (vec_select:<VEL>
 691              (match_operand:<VCOND> 2 "register_operand" "<h_con>")
 692              (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
 693          (match_operand:VMULD 1 "register_operand" "w")))]
 694   "TARGET_SIMD"
 695   {
 696     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
 697     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 698   }
 699   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 700 )
 701
 702 (define_insn "mul_laneq<mode>3"
 703   [(set (match_operand:VMUL 0 "register_operand" "=w")
 704      (mult:VMUL
 705        (vec_duplicate:VMUL
 706           (vec_select:<VEL>
 707             (match_operand:<VCONQ> 2 "register_operand" "<h_con>")
 708             (parallel [(match_operand:SI 3 "immediate_operand")])))
 709       (match_operand:VMUL 1 "register_operand" "w")))]
 710   "TARGET_SIMD"
 711   {
 712     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
 713     return "<f>mul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]";
 714   }
 715   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 716 )
 717
 718 (define_insn "mul_n<mode>3"
 719  [(set (match_operand:VMUL 0 "register_operand" "=w")
 720        (mult:VMUL
 721          (vec_duplicate:VMUL
 722            (match_operand:<VEL> 2 "register_operand" "<h_con>"))
 723          (match_operand:VMUL 1 "register_operand" "w")))]
 724   "TARGET_SIMD"
 725   "<f>mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
 726   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
 727 )
 728
 729 (define_insn "@aarch64_rsqrte<mode>"
 730   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 731         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
 732                      UNSPEC_RSQRTE))]
 733   "TARGET_SIMD"
 734   "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 735   [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 736
 737 (define_insn "@aarch64_rsqrts<mode>"
 738   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
 739         (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
 740                             (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
 741          UNSPEC_RSQRTS))]
 742   "TARGET_SIMD"
 743   "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
 744   [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
 745
 746 (define_expand "rsqrt<mode>2"
 747   [(set (match_operand:VALLF 0 "register_operand")
 748         (unspec:VALLF [(match_operand:VALLF 1 "register_operand")]
 749                      UNSPEC_RSQRT))]
 750   "TARGET_SIMD"
 751 {
 752   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
 753   DONE;
 754 })
 755
 756 (define_insn "aarch64_ursqrte<mode>"
 757 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
 758       (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
 759                    UNSPEC_RSQRTE))]
 760 "TARGET_SIMD"
 761 "ursqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 762 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
 763
 764 (define_insn "*aarch64_mul3_elt_to_64v2df"
 765   [(set (match_operand:DF 0 "register_operand" "=w")
 766      (mult:DF
 767        (vec_select:DF
 768          (match_operand:V2DF 1 "register_operand" "w")
 769          (parallel [(match_operand:SI 2 "immediate_operand")]))
 770        (match_operand:DF 3 "register_operand" "w")))]
 771   "TARGET_SIMD"
 772   {
 773     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
 774     return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
 775   }
 776   [(set_attr "type" "neon_fp_mul_d_scalar_q")]
 777 )
 778
 779 (define_insn "neg<mode>2<vczle><vczbe>"
 780   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 781         (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 782   "TARGET_SIMD"
 783   "neg\t%0.<Vtype>, %1.<Vtype>"
 784   [(set_attr "type" "neon_neg<q>")]
 785 )
 786
 787 (define_insn "abs<mode>2<vczle><vczbe>"
 788   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
 789         (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
 790   "TARGET_SIMD"
 791   "abs\t%0.<Vtype>, %1.<Vtype>"
 792   [(set_attr "type" "neon_abs<q>")]
 793 )
 794
 795 ;; The intrinsic version of integer ABS must not be allowed to
 796 ;; combine with any operation with an integrated ABS step, such
 797 ;; as SABD.
 798 (define_insn "aarch64_abs<mode><vczle><vczbe>"
 799   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 800           (unspec:VSDQ_I_DI
 801             [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
 802            UNSPEC_ABS))]
 803   "TARGET_SIMD"
 804   "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
 805   [(set_attr "type" "neon_abs<q>")]
 806 )
 807
 808 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
 809 ;; This isn't accurate as ABS treats always its input as a signed value.
 810 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
 811 ;; Whereas SABD would return 192 (-64 signed) on the above example.
 812 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
 813 (define_insn "aarch64_<su>abd<mode><vczle><vczbe>"
 814   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
 815         (minus:VDQ_BHSI
 816           (USMAX:VDQ_BHSI
 817             (match_operand:VDQ_BHSI 1 "register_operand" "w")
 818             (match_operand:VDQ_BHSI 2 "register_operand" "w"))
 819           (<max_opp>:VDQ_BHSI
 820             (match_dup 1)
 821             (match_dup 2))))]
 822   "TARGET_SIMD"
 823   "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
 824   [(set_attr "type" "neon_abd<q>")]
 825 )
 826
 827 (define_expand "<su>abd<mode>3"
 828   [(match_operand:VDQ_BHSI 0 "register_operand")
 829    (USMAX:VDQ_BHSI
 830      (match_operand:VDQ_BHSI 1 "register_operand")
 831      (match_operand:VDQ_BHSI 2 "register_operand"))]
 832   "TARGET_SIMD"
 833   {
 834     emit_insn (gen_aarch64_<su>abd<mode> (operands[0], operands[1], operands[2]));
 835     DONE;
 836   }
 837 )
 838
 839 (define_insn "aarch64_<su>abdl<mode>"
 840   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 841         (zero_extend:<VWIDE>
 842           (minus:VD_BHSI
 843             (USMAX:VD_BHSI
 844               (match_operand:VD_BHSI 1 "register_operand" "w")
 845               (match_operand:VD_BHSI 2 "register_operand" "w"))
 846             (<max_opp>:VD_BHSI
 847               (match_dup 1)
 848               (match_dup 2)))))]
 849   "TARGET_SIMD"
 850   "<su>abdl\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 851   [(set_attr "type" "neon_abd<q>")]
 852 )
 853
 854 (define_insn "aarch64_<su>abdl2<mode>_insn"
 855   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 856         (zero_extend:<VDBLW>
 857           (minus:<VHALF>
 858             (USMAX:<VHALF>
 859               (vec_select:<VHALF>
 860                 (match_operand:VQW 1 "register_operand" "w")
 861                 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))
 862               (vec_select:<VHALF>
 863                 (match_operand:VQW 2 "register_operand" "w")
 864                 (match_dup 3)))
 865             (<max_opp>:<VHALF>
 866               (vec_select:<VHALF>
 867                 (match_dup 1)
 868                 (match_dup 3))
 869               (vec_select:<VHALF>
 870                 (match_dup 2)
 871                 (match_dup 3))))))]
 872
 873   "TARGET_SIMD"
 874   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 875   [(set_attr "type" "neon_abd<q>")]
 876 )
 877
 878 (define_expand "aarch64_<su>abdl2<mode>"
 879   [(match_operand:<VDBLW> 0 "register_operand")
 880    (USMAX:VQW
 881      (match_operand:VQW 1 "register_operand")
 882      (match_operand:VQW 2 "register_operand"))]
 883   "TARGET_SIMD"
 884   {
 885     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 886     emit_insn (gen_aarch64_<su>abdl2<mode>_insn (operands[0], operands[1],
 887                                                  operands[2], hi));
 888     DONE;
 889   }
 890 )
 891
 892 (define_insn "aarch64_<su>abdl<mode>_hi_internal"
 893   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 894         (abs:<VWIDE>
 895           (minus:<VWIDE>
 896             (ANY_EXTEND:<VWIDE>
 897               (vec_select:<VHALF>
 898                 (match_operand:VQW 1 "register_operand" "w")
 899                 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
 900             (ANY_EXTEND:<VWIDE>
 901               (vec_select:<VHALF>
 902                 (match_operand:VQW 2 "register_operand" "w")
 903                 (match_dup 3))))))]
 904   "TARGET_SIMD"
 905   "<su>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
 906   [(set_attr "type" "neon_abd_long")]
 907 )
 908
 909 (define_insn "aarch64_<su>abdl<mode>_lo_internal"
 910   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 911         (abs:<VWIDE>
 912           (minus:<VWIDE>
 913             (ANY_EXTEND:<VWIDE>
 914               (vec_select:<VHALF>
 915                 (match_operand:VQW 1 "register_operand" "w")
 916                 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
 917             (ANY_EXTEND:<VWIDE>
 918               (vec_select:<VHALF>
 919                 (match_operand:VQW 2 "register_operand" "w")
 920                 (match_dup 3))))))]
 921   "TARGET_SIMD"
 922   "<su>abdl\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
 923   [(set_attr "type" "neon_abd_long")]
 924 )
 925
 926 (define_expand "vec_widen_<su>abd_hi_<mode>"
 927   [(match_operand:<VWIDE> 0 "register_operand")
 928    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
 929    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
 930   "TARGET_SIMD"
 931   {
 932     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
 933     emit_insn (gen_aarch64_<su>abdl<mode>_hi_internal (operands[0], operands[1],
 934                                                        operands[2], p));
 935     DONE;
 936   }
 937 )
 938
 939 (define_expand "vec_widen_<su>abd_lo_<mode>"
 940   [(match_operand:<VWIDE> 0 "register_operand")
 941    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
 942    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
 943   "TARGET_SIMD"
 944   {
 945     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
 946     emit_insn (gen_aarch64_<su>abdl<mode>_lo_internal (operands[0], operands[1],
 947                                                        operands[2], p));
 948     DONE;
 949   }
 950 )
 951
 952 (define_insn "aarch64_<su>abal<mode>"
 953   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
 954         (plus:<VWIDE>
 955           (zero_extend:<VWIDE>
 956             (minus:VD_BHSI
 957               (USMAX:VD_BHSI
 958                 (match_operand:VD_BHSI 2 "register_operand" "w")
 959                 (match_operand:VD_BHSI 3 "register_operand" "w"))
 960               (<max_opp>:VD_BHSI
 961                 (match_dup 2)
 962                 (match_dup 3))))
 963           (match_operand:<VWIDE> 1 "register_operand" "0")))]
 964   "TARGET_SIMD"
 965   "<su>abal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
 966   [(set_attr "type" "neon_arith_acc<q>")]
 967 )
 968
 969 (define_insn "aarch64_<su>abal2<mode>_insn"
 970   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
 971         (plus:<VDBLW>
 972           (zero_extend:<VDBLW>
 973             (minus:<VHALF>
 974               (USMAX:<VHALF>
 975                 (vec_select:<VHALF>
 976                   (match_operand:VQW 2 "register_operand" "w")
 977                   (match_operand:VQW 4 "vect_par_cnst_hi_half" ""))
 978                 (vec_select:<VHALF>
 979                   (match_operand:VQW 3 "register_operand" "w")
 980                   (match_dup 4)))
 981               (<max_opp>:<VHALF>
 982                 (vec_select:<VHALF>
 983                   (match_dup 2)
 984                   (match_dup 4))
 985                 (vec_select:<VHALF>
 986                   (match_dup 3)
 987                   (match_dup 4)))))
 988           (match_operand:<VDBLW> 1 "register_operand" "0")))]
 989   "TARGET_SIMD"
 990   "<su>abal2\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
 991   [(set_attr "type" "neon_arith_acc<q>")]
 992 )
 993
 994 (define_expand "aarch64_<su>abal2<mode>"
 995   [(match_operand:<VDBLW> 0 "register_operand")
 996    (match_operand:<VDBLW> 1 "register_operand")
 997    (USMAX:VQW
 998      (match_operand:VQW 2 "register_operand")
 999      (match_operand:VQW 3 "register_operand"))]
1000   "TARGET_SIMD"
1001   {
1002     rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1003     emit_insn (gen_aarch64_<su>abal2<mode>_insn (operands[0], operands[1],
1004                                                  operands[2], operands[3], hi));
1005     DONE;
1006   }
1007 )
1008
1009 (define_expand "aarch64_<su>adalp<mode>"
1010   [(set (match_operand:<VDBLW> 0 "register_operand")
1011         (plus:<VDBLW>
1012           (plus:<VDBLW>
1013             (vec_select:<VDBLW>
1014               (ANY_EXTEND:<V2XWIDE>
1015                 (match_operand:VDQV_L 2 "register_operand"))
1016               (match_dup 3))
1017             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1018               (match_dup 4)))
1019           (match_operand:<VDBLW> 1 "register_operand")))]
1020  "TARGET_SIMD"
1021  {
1022    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
1023    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
1024    operands[4] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
1025  }
1026 )
1027
1028 (define_insn "*aarch64_<su>adalp<mode><vczle><vczbe>_insn"
1029   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
1030         (plus:<VDBLW>
1031           (plus:<VDBLW>
1032             (vec_select:<VDBLW>
1033               (ANY_EXTEND:<V2XWIDE>
1034                 (match_operand:VDQV_L 2 "register_operand" "w"))
1035               (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half" ""))
1036             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 2))
1037               (match_operand:<V2XWIDE> 4 "vect_par_cnst_even_or_odd_half" "")))
1038         (match_operand:<VDBLW> 1 "register_operand" "0")))]
1039  "TARGET_SIMD
1040   && !rtx_equal_p (operands[3], operands[4])"
1041  "<su>adalp\t%0.<Vwhalf>, %2.<Vtype>"
1042   [(set_attr "type" "neon_reduc_add<q>")]
1043 )
1044
1045 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
1046 ;; inputs in operands 1 and 2.  The sequence also has to perform a widening
1047 ;; reduction of the difference into a V4SI vector and accumulate that into
1048 ;; operand 3 before copying that into the result operand 0.
1049 ;; Perform that with a sequence of:
1050 ;; UABDL2       tmp.8h, op1.16b, op2.16b
1051 ;; UABAL        tmp.8h, op1.8b, op2.8b
1052 ;; UADALP       op3.4s, tmp.8h
1053 ;; MOV          op0, op3 // should be eliminated in later passes.
1054 ;;
1055 ;; For TARGET_DOTPROD we do:
1056 ;; MOV  tmp1.16b, #1 // Can be CSE'd and hoisted out of loops.
1057 ;; UABD tmp2.16b, op1.16b, op2.16b
1058 ;; UDOT op3.4s, tmp2.16b, tmp1.16b
1059 ;; MOV  op0, op3 // RA will tie the operands of UDOT appropriately.
1060 ;;
1061 ;; The signed version just uses the signed variants of the above instructions
1062 ;; but for TARGET_DOTPROD still emits a UDOT as the absolute difference is
1063 ;; unsigned.
1064
1065 (define_expand "<su>sadv16qi"
1066   [(use (match_operand:V4SI 0 "register_operand"))
1067    (USMAX:V16QI (match_operand:V16QI 1 "register_operand")
1068                 (match_operand:V16QI 2 "register_operand"))
1069    (use (match_operand:V4SI 3 "register_operand"))]
1070   "TARGET_SIMD"
1071   {
1072     if (TARGET_DOTPROD)
1073       {
1074         rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
1075         rtx abd = gen_reg_rtx (V16QImode);
1076         emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
1077         emit_insn (gen_udot_prodv4siv16qi (operands[0], abd, ones,
1078                                            operands[3]));
1079         DONE;
1080       }
1081     rtx reduc = gen_reg_rtx (V8HImode);
1082     emit_insn (gen_aarch64_<su>abdl2v16qi (reduc, operands[1],
1083                                             operands[2]));
1084     emit_insn (gen_aarch64_<su>abalv8qi (reduc, reduc,
1085                                          gen_lowpart (V8QImode, operands[1]),
1086                                          gen_lowpart (V8QImode,
1087                                                       operands[2])));
1088     emit_insn (gen_aarch64_<su>adalpv8hi (operands[3], operands[3], reduc));
1089     emit_move_insn (operands[0], operands[3]);
1090     DONE;
1091   }
1092 )
1093
1094 (define_insn "aarch64_<su>aba<mode><vczle><vczbe>"
1095   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1096         (plus:VDQ_BHSI (minus:VDQ_BHSI
1097                          (USMAX:VDQ_BHSI
1098                            (match_operand:VDQ_BHSI 2 "register_operand" "w")
1099                            (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1100                          (<max_opp>:VDQ_BHSI
1101                            (match_dup 2)
1102                            (match_dup 3)))
1103                        (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1104   "TARGET_SIMD"
1105   "<su>aba\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1106   [(set_attr "type" "neon_arith_acc<q>")]
1107 )
1108
1109 (define_insn "fabd<mode>3<vczle><vczbe>"
1110   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
1111         (abs:VHSDF_HSDF
1112           (minus:VHSDF_HSDF
1113             (match_operand:VHSDF_HSDF 1 "register_operand" "w")
1114             (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
1115   "TARGET_SIMD"
1116   "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
1117   [(set_attr "type" "neon_fp_abd_<stype><q>")]
1118 )
1119
1120 ;; For AND (vector, register) and BIC (vector, immediate)
1121 (define_insn "and<mode>3<vczle><vczbe>"
1122   [(set (match_operand:VDQ_I 0 "register_operand")
1123         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1124                    (match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
1125   "TARGET_SIMD"
1126   {@ [ cons: =0 , 1 , 2   ]
1127      [ w        , w , w   ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1128      [ w        , 0 , Db  ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
1129   }
1130   [(set_attr "type" "neon_logic<q>")]
1131 )
1132
1133 ;; For ORR (vector, register) and ORR (vector, immediate)
1134 (define_insn "ior<mode>3<vczle><vczbe>"
1135   [(set (match_operand:VDQ_I 0 "register_operand")
1136         (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1137                    (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))]
1138   "TARGET_SIMD"
1139   {@ [ cons: =0 , 1 , 2  ]
1140      [ w        , w , w  ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1141      [ w        , 0 , Do ] << aarch64_output_simd_orr_imm (operands[2], <bitsize>);
1142   }
1143   [(set_attr "type" "neon_logic<q>")]
1144 )
1145
1146 ;; For EOR (vector, register) and SVE EOR (vector, immediate)
1147 (define_insn "xor<mode>3<vczle><vczbe>"
1148   [(set (match_operand:VDQ_I 0 "register_operand")
1149         (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1150                    (match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))]
1151   "TARGET_SIMD"
1152   {@ [ cons: =0 , 1 , 2  ]
1153      [ w        , w , w  ] eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
1154      [ w        , 0 , De ] << aarch64_output_simd_xor_imm (operands[2], <bitsize>);
1155   }
1156   [(set_attr "type" "neon_logic<q>")]
1157 )
1158
1159 (define_insn "one_cmpl<mode>2<vczle><vczbe>"
1160   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1161         (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
1162   "TARGET_SIMD"
1163   "not\t%0.<Vbtype>, %1.<Vbtype>"
1164   [(set_attr "type" "neon_logic<q>")]
1165 )
1166
1167 (define_insn "@aarch64_simd_vec_set<mode>"
1168   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
1169         (vec_merge:VALL_F16
1170             (vec_duplicate:VALL_F16
1171                 (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
1172             (match_operand:VALL_F16 3 "register_operand" "0,0,0")
1173             (match_operand:SI 2 "immediate_operand" "i,i,i")))]
1174   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1175   {
1176    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1177    operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1178    switch (which_alternative)
1179      {
1180      case 0:
1181         return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1182      case 1:
1183         return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
1184      case 2:
1185         return "ld1\\t{%0.<Vetype>}[%p2], %1";
1186      default:
1187         gcc_unreachable ();
1188      }
1189   }
1190   [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
1191 )
1192
1193 (define_insn "aarch64_simd_vec_set_zero<mode>"
1194   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1195         (vec_merge:VALL_F16
1196             (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
1197             (match_operand:VALL_F16 3 "register_operand" "0")
1198             (match_operand:SI 2 "immediate_operand" "i")))]
1199   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1200   {
1201     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1202     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1203     return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
1204   }
1205 )
1206
1207 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
1208   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
1209         (vec_merge:VALL_F16
1210             (vec_duplicate:VALL_F16
1211               (vec_select:<VEL>
1212                 (match_operand:VALL_F16 3 "register_operand" "w")
1213                 (parallel
1214                   [(match_operand:SI 4 "immediate_operand" "i")])))
1215             (match_operand:VALL_F16 1 "register_operand" "0")
1216             (match_operand:SI 2 "immediate_operand" "i")))]
1217   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1218   {
1219     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1220     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1221     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
1222
1223     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1224   }
1225   [(set_attr "type" "neon_ins<q>")]
1226 )
1227
1228 (define_insn "@aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
1229   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
1230         (vec_merge:VALL_F16_NO_V2Q
1231             (vec_duplicate:VALL_F16_NO_V2Q
1232               (vec_select:<VEL>
1233                 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
1234                 (parallel
1235                   [(match_operand:SI 4 "immediate_operand" "i")])))
1236             (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
1237             (match_operand:SI 2 "immediate_operand" "i")))]
1238   "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
1239   {
1240     int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
1241     operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
1242     operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
1243                                            INTVAL (operands[4]));
1244
1245     return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
1246   }
1247   [(set_attr "type" "neon_ins<q>")]
1248 )
1249
1250 (define_expand "signbit<mode>2"
1251   [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
1252    (use (match_operand:VDQSF 1 "register_operand"))]
1253   "TARGET_SIMD"
1254 {
1255   int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
1256   rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
1257                                                         shift_amount);
1258   operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
1259
1260   emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
1261                                                  shift_vector));
1262   DONE;
1263 })
1264
1265 (define_insn "aarch64_simd_lshr<mode><vczle><vczbe>"
1266  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1267        (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1268                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm" "Dr")))]
1269  "TARGET_SIMD"
1270  "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
1271   [(set_attr "type" "neon_shift_imm<q>")]
1272 )
1273
1274 (define_insn "aarch64_simd_ashr<mode><vczle><vczbe>"
1275  [(set (match_operand:VDQ_I 0 "register_operand")
1276        (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1277                      (match_operand:VDQ_I  2 "aarch64_simd_rshift_imm")))]
1278  "TARGET_SIMD"
1279  {@ [ cons: =0 , 1 , 2  ; attrs: type        ]
1280     [ w        , w , D1 ; neon_compare<q>    ] cmlt\t%0.<Vtype>, %1.<Vtype>, #0
1281     [ w        , w , Dr ; neon_shift_imm<q>  ] sshr\t%0.<Vtype>, %1.<Vtype>, %2
1282   }
1283 )
1284
1285 (define_insn "aarch64_<sra_op>sra_n<mode>_insn"
1286  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1287         (plus:VDQ_I
1288            (SHIFTRT:VDQ_I
1289                 (match_operand:VDQ_I 2 "register_operand" "w")
1290                 (match_operand:VDQ_I 3 "aarch64_simd_rshift_imm"))
1291            (match_operand:VDQ_I 1 "register_operand" "0")))]
1292   "TARGET_SIMD"
1293   "<sra_op>sra\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1294   [(set_attr "type" "neon_shift_acc<q>")]
1295 )
1296
1297 ;; After all the combinations and propagations of ROTATE have been
1298 ;; attempted split any remaining vector rotates into SHL + USRA sequences.
1299 ;; Don't match this after reload as the various possible sequence for this
1300 ;; require temporary registers.
1301 (define_insn_and_split "*aarch64_simd_rotate_imm<mode>"
1302   [(set (match_operand:VDQ_I 0 "register_operand" "=&w")
1303         (rotate:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1304                       (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm")))]
1305   "TARGET_SIMD && can_create_pseudo_p ()"
1306   "#"
1307   "&& 1"
1308   [(set (match_dup 3)
1309         (ashift:VDQ_I (match_dup 1)
1310                       (match_dup 2)))
1311    (set (match_dup 0)
1312         (plus:VDQ_I
1313           (lshiftrt:VDQ_I
1314             (match_dup 1)
1315             (match_dup 4))
1316           (match_dup 3)))]
1317   {
1318     if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
1319       DONE;
1320
1321     operands[3] = gen_reg_rtx (<MODE>mode);
1322     rtx shft_amnt = unwrap_const_vec_duplicate (operands[2]);
1323     int bitwidth = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1324     operands[4]
1325       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1326                                            bitwidth - INTVAL (shft_amnt));
1327   }
1328   [(set_attr "length" "8")]
1329 )
1330
1331 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
1332  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
1333         (plus:VSDQ_I_DI
1334           (truncate:VSDQ_I_DI
1335             (SHIFTRT:<V2XWIDE>
1336               (plus:<V2XWIDE>
1337                 (<SHIFTEXTEND>:<V2XWIDE>
1338                   (match_operand:VSDQ_I_DI 2 "register_operand" "w"))
1339                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
1340               (match_operand:VSDQ_I_DI 3 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>")))
1341           (match_operand:VSDQ_I_DI 1 "register_operand" "0")))]
1342   "TARGET_SIMD
1343    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
1344   "<sra_op>rsra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
1345   [(set_attr "type" "neon_shift_acc<q>")]
1346 )
1347
1348 (define_expand "aarch64_<sra_op>sra_n<mode>"
1349  [(set (match_operand:VDQ_I 0 "register_operand")
1350         (plus:VDQ_I
1351            (SHIFTRT:VDQ_I
1352                 (match_operand:VDQ_I 2 "register_operand")
1353                 (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))
1354            (match_operand:VDQ_I 1 "register_operand")))]
1355   "TARGET_SIMD"
1356   {
1357     operands[3]
1358       = aarch64_simd_gen_const_vector_dup (<MODE>mode, UINTVAL (operands[3]));
1359   }
1360 )
1361
1362 (define_expand "aarch64_<sra_op>rsra_n<mode>"
1363   [(match_operand:VSDQ_I_DI 0 "register_operand")
1364    (match_operand:VSDQ_I_DI 1 "register_operand")
1365    (SHIFTRT:VSDQ_I_DI
1366      (match_operand:VSDQ_I_DI 2 "register_operand")
1367      (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
1368   "TARGET_SIMD"
1369   {
1370     /* Use this expander to create the rounding constant vector, which is
1371        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
1372        RTL is generated when handling the DImode expanders.  */
1373     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
1374     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
1375     rtx shft = gen_int_mode (INTVAL (operands[3]), DImode);
1376     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
1377     if (VECTOR_MODE_P (<MODE>mode))
1378       {
1379         shft = gen_const_vec_duplicate (<MODE>mode, shft);
1380         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
1381       }
1382
1383     emit_insn (gen_aarch64_<sra_op>rsra_n<mode>_insn (operands[0], operands[1],
1384                                                       operands[2], shft, rnd));
1385     DONE;
1386   }
1387 )
1388
1389 (define_insn "aarch64_simd_imm_shl<mode><vczle><vczbe>"
1390  [(set (match_operand:VDQ_I 0 "register_operand")
1391        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand")
1392                    (match_operand:VDQ_I  2 "aarch64_simd_lshift_imm")))]
1393  "TARGET_SIMD"
1394   {@ [ cons: =0, 1,  2   ; attrs: type       ]
1395      [ w       , w,  vs1 ; neon_add<q>       ] add\t%0.<Vtype>, %1.<Vtype>, %1.<Vtype>
1396      [ w       , w,  Dl  ; neon_shift_imm<q> ] shl\t%0.<Vtype>, %1.<Vtype>, %2
1397   }
1398 )
1399
1400 (define_insn "aarch64_simd_reg_sshl<mode><vczle><vczbe>"
1401  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1402        (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
1403                    (match_operand:VDQ_I 2 "register_operand" "w")))]
1404  "TARGET_SIMD"
1405  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1406   [(set_attr "type" "neon_shift_reg<q>")]
1407 )
1408
1409 (define_insn "aarch64_simd_reg_shl<mode>_unsigned<vczle><vczbe>"
1410  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1411        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1412                     (match_operand:VDQ_I 2 "register_operand" "w")]
1413                    UNSPEC_ASHIFT_UNSIGNED))]
1414  "TARGET_SIMD"
1415  "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1416   [(set_attr "type" "neon_shift_reg<q>")]
1417 )
1418
1419 (define_insn "aarch64_simd_reg_shl<mode>_signed<vczle><vczbe>"
1420  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
1421        (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
1422                     (match_operand:VDQ_I 2 "register_operand" "w")]
1423                    UNSPEC_ASHIFT_SIGNED))]
1424  "TARGET_SIMD"
1425  "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1426   [(set_attr "type" "neon_shift_reg<q>")]
1427 )
1428
1429 (define_expand "ashl<mode>3"
1430   [(match_operand:VDQ_I 0 "register_operand")
1431    (match_operand:VDQ_I 1 "register_operand")
1432    (match_operand:SI  2 "general_operand")]
1433  "TARGET_SIMD"
1434 {
1435   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1436   int shift_amount;
1437
1438   if (CONST_INT_P (operands[2]))
1439     {
1440       shift_amount = INTVAL (operands[2]);
1441       if (shift_amount >= 0 && shift_amount < bit_width)
1442         {
1443           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1444                                                        shift_amount);
1445           emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1446                                                      operands[1],
1447                                                      tmp));
1448           DONE;
1449         }
1450     }
1451
1452   operands[2] = force_reg (SImode, operands[2]);
1453
1454   rtx tmp = gen_reg_rtx (<MODE>mode);
1455   emit_insn (gen_aarch64_simd_dup<mode> (tmp, convert_to_mode (<VEL>mode,
1456                                                                operands[2],
1457                                                                0)));
1458   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1], tmp));
1459   DONE;
1460 })
1461
1462 (define_expand "lshr<mode>3"
1463   [(match_operand:VDQ_I 0 "register_operand")
1464    (match_operand:VDQ_I 1 "register_operand")
1465    (match_operand:SI  2 "general_operand")]
1466  "TARGET_SIMD"
1467 {
1468   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1469   int shift_amount;
1470
1471   if (CONST_INT_P (operands[2]))
1472     {
1473       shift_amount = INTVAL (operands[2]);
1474       if (shift_amount > 0 && shift_amount <= bit_width)
1475         {
1476           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1477                                                        shift_amount);
1478           emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1479                                                   operands[1],
1480                                                   tmp));
1481           DONE;
1482         }
1483     }
1484
1485   operands[2] = force_reg (SImode, operands[2]);
1486
1487   rtx tmp = gen_reg_rtx (SImode);
1488   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1489   emit_insn (gen_negsi2 (tmp, operands[2]));
1490   emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1491                                          convert_to_mode (<VEL>mode, tmp, 0)));
1492   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1493                                                       tmp1));
1494   DONE;
1495 })
1496
1497 (define_expand "ashr<mode>3"
1498   [(match_operand:VDQ_I 0 "register_operand")
1499    (match_operand:VDQ_I 1 "register_operand")
1500    (match_operand:SI  2 "general_operand")]
1501  "TARGET_SIMD"
1502 {
1503   int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1504   int shift_amount;
1505
1506   if (CONST_INT_P (operands[2]))
1507     {
1508       shift_amount = INTVAL (operands[2]);
1509       if (shift_amount > 0 && shift_amount <= bit_width)
1510         {
1511           rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1512                                                        shift_amount);
1513           emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1514                                                   operands[1],
1515                                                   tmp));
1516           DONE;
1517         }
1518     }
1519
1520   operands[2] = force_reg (SImode, operands[2]);
1521
1522   rtx tmp = gen_reg_rtx (SImode);
1523   rtx tmp1 = gen_reg_rtx (<MODE>mode);
1524   emit_insn (gen_negsi2 (tmp, operands[2]));
1525   emit_insn (gen_aarch64_simd_dup<mode> (tmp1, convert_to_mode (<VEL>mode,
1526                                                                 tmp, 0)));
1527   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1528                                                     tmp1));
1529   DONE;
1530 })
1531
1532 (define_expand "vashl<mode>3"
1533  [(match_operand:VDQ_I 0 "register_operand")
1534   (match_operand:VDQ_I 1 "register_operand")
1535   (match_operand:VDQ_I 2 "register_operand")]
1536  "TARGET_SIMD"
1537 {
1538   emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1539                                               operands[2]));
1540   DONE;
1541 })
1542
1543 (define_expand "vashr<mode>3"
1544  [(match_operand:VDQ_I 0 "register_operand")
1545   (match_operand:VDQ_I 1 "register_operand")
1546   (match_operand:VDQ_I 2 "register_operand")]
1547  "TARGET_SIMD"
1548 {
1549   rtx neg = gen_reg_rtx (<MODE>mode);
1550   emit (gen_neg<mode>2 (neg, operands[2]));
1551   emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1552                                                     neg));
1553   DONE;
1554 })
1555
1556 ;; DI vector shift
1557 (define_expand "aarch64_ashr_simddi"
1558   [(match_operand:DI 0 "register_operand")
1559    (match_operand:DI 1 "register_operand")
1560    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1561   "TARGET_SIMD"
1562   {
1563     /* An arithmetic shift right by 64 fills the result with copies of the sign
1564        bit, just like asr by 63 - however the standard pattern does not handle
1565        a shift by 64.  */
1566     if (INTVAL (operands[2]) == 64)
1567       operands[2] = GEN_INT (63);
1568     emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1569     DONE;
1570   }
1571 )
1572
1573 (define_expand "vlshr<mode>3"
1574  [(match_operand:VDQ_I 0 "register_operand")
1575   (match_operand:VDQ_I 1 "register_operand")
1576   (match_operand:VDQ_I 2 "register_operand")]
1577  "TARGET_SIMD"
1578 {
1579   rtx neg = gen_reg_rtx (<MODE>mode);
1580   emit (gen_neg<mode>2 (neg, operands[2]));
1581   emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1582                                                       neg));
1583   DONE;
1584 })
1585
1586 (define_expand "aarch64_lshr_simddi"
1587   [(match_operand:DI 0 "register_operand")
1588    (match_operand:DI 1 "register_operand")
1589    (match_operand:SI 2 "aarch64_shift_imm64_di")]
1590   "TARGET_SIMD"
1591   {
1592     if (INTVAL (operands[2]) == 64)
1593       emit_move_insn (operands[0], const0_rtx);
1594     else
1595       emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1596     DONE;
1597   }
1598 )
1599
1600 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1601 (define_insn "vec_shr_<mode><vczle><vczbe>"
1602   [(set (match_operand:VD 0 "register_operand" "=w")
1603         (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1604                     (match_operand:SI 2 "immediate_operand" "i")]
1605                    UNSPEC_VEC_SHR))]
1606   "TARGET_SIMD"
1607   {
1608     if (BYTES_BIG_ENDIAN)
1609       return "shl %d0, %d1, %2";
1610     else
1611       return "ushr %d0, %d1, %2";
1612   }
1613   [(set_attr "type" "neon_shift_imm")]
1614 )
1615
1616 (define_expand "vec_set<mode>"
1617   [(match_operand:VALL_F16 0 "register_operand")
1618    (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
1619    (match_operand:SI 2 "immediate_operand")]
1620   "TARGET_SIMD"
1621   {
1622     HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1623     emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1624                                           GEN_INT (elem), operands[0]));
1625     DONE;
1626   }
1627 )
1628
1629
1630 (define_insn "aarch64_mla<mode><vczle><vczbe>"
1631  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1632        (plus:VDQ_BHSI (mult:VDQ_BHSI
1633                         (match_operand:VDQ_BHSI 2 "register_operand" "w")
1634                         (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1635                       (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1636  "TARGET_SIMD"
1637  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1638   [(set_attr "type" "neon_mla_<Vetype><q>")]
1639 )
1640
1641 (define_insn "*aarch64_mla_elt<mode><vczle><vczbe>"
1642  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1643        (plus:VDQHS
1644          (mult:VDQHS
1645            (vec_duplicate:VDQHS
1646               (vec_select:<VEL>
1647                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1648                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1649            (match_operand:VDQHS 3 "register_operand" "w"))
1650          (match_operand:VDQHS 4 "register_operand" "0")))]
1651  "TARGET_SIMD"
1652   {
1653     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1654     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1655   }
1656   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1657 )
1658
1659 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode><vczle><vczbe>"
1660  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1661        (plus:VDQHS
1662          (mult:VDQHS
1663            (vec_duplicate:VDQHS
1664               (vec_select:<VEL>
1665                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1666                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1667            (match_operand:VDQHS 3 "register_operand" "w"))
1668          (match_operand:VDQHS 4 "register_operand" "0")))]
1669  "TARGET_SIMD"
1670   {
1671     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1672     return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1673   }
1674   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1675 )
1676
1677 (define_insn "aarch64_mla_n<mode><vczle><vczbe>"
1678  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1679         (plus:VDQHS
1680           (mult:VDQHS
1681             (vec_duplicate:VDQHS
1682               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1683             (match_operand:VDQHS 2 "register_operand" "w"))
1684           (match_operand:VDQHS 1 "register_operand" "0")))]
1685  "TARGET_SIMD"
1686  "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1687   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1688 )
1689
1690 (define_insn "aarch64_mls<mode><vczle><vczbe>"
1691  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1692        (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1693                    (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1694                               (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1695  "TARGET_SIMD"
1696  "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1697   [(set_attr "type" "neon_mla_<Vetype><q>")]
1698 )
1699
1700 (define_insn "*aarch64_mls_elt<mode><vczle><vczbe>"
1701  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1702        (minus:VDQHS
1703          (match_operand:VDQHS 4 "register_operand" "0")
1704          (mult:VDQHS
1705            (vec_duplicate:VDQHS
1706               (vec_select:<VEL>
1707                 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1708                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1709            (match_operand:VDQHS 3 "register_operand" "w"))))]
1710  "TARGET_SIMD"
1711   {
1712     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1713     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1714   }
1715   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1716 )
1717
1718 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode><vczle><vczbe>"
1719  [(set (match_operand:VDQHS 0 "register_operand" "=w")
1720        (minus:VDQHS
1721          (match_operand:VDQHS 4 "register_operand" "0")
1722          (mult:VDQHS
1723            (vec_duplicate:VDQHS
1724               (vec_select:<VEL>
1725                 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1726                   (parallel [(match_operand:SI 2 "immediate_operand")])))
1727            (match_operand:VDQHS 3 "register_operand" "w"))))]
1728  "TARGET_SIMD"
1729   {
1730     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1731     return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
1732   }
1733   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1734 )
1735
1736 (define_insn "aarch64_mls_n<mode><vczle><vczbe>"
1737   [(set (match_operand:VDQHS 0 "register_operand" "=w")
1738         (minus:VDQHS
1739           (match_operand:VDQHS 1 "register_operand" "0")
1740           (mult:VDQHS
1741             (vec_duplicate:VDQHS
1742               (match_operand:<VEL> 3 "register_operand" "<h_con>"))
1743             (match_operand:VDQHS 2 "register_operand" "w"))))]
1744   "TARGET_SIMD"
1745   "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[0]"
1746   [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1747 )
1748
1749 ;; Max/Min operations.
1750 (define_insn "<su><maxmin><mode>3<vczle><vczbe>"
1751  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1752        (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1753                     (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1754  "TARGET_SIMD"
1755  "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1756   [(set_attr "type" "neon_minmax<q>")]
1757 )
1758
1759 (define_expand "<su><maxmin>v2di3"
1760  [(set (match_operand:V2DI 0 "register_operand")
1761        (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand")
1762                     (match_operand:V2DI 2 "register_operand")))]
1763  "TARGET_SIMD"
1764 {
1765   enum rtx_code cmp_operator;
1766   rtx cmp_fmt;
1767
1768   switch (<CODE>)
1769     {
1770     case UMIN:
1771       cmp_operator = LTU;
1772       break;
1773     case SMIN:
1774       cmp_operator = LT;
1775       break;
1776     case UMAX:
1777       cmp_operator = GTU;
1778       break;
1779     case SMAX:
1780       cmp_operator = GT;
1781       break;
1782     default:
1783       gcc_unreachable ();
1784     }
1785
1786   rtx mask = gen_reg_rtx (V2DImode);
1787   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1788   emit_insn (gen_vec_cmpv2div2di (mask, cmp_fmt, operands[1], operands[2]));
1789   emit_insn (gen_vcond_mask_v2div2di (operands[0], operands[1],
1790                                       operands[2], mask));
1791   DONE;
1792 })
1793
1794 ;; Pairwise Integer Max/Min operations.
1795 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1796  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1797        (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1798                          (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1799                         MAXMINV))]
1800  "TARGET_SIMD"
1801  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1802   [(set_attr "type" "neon_minmax<q>")]
1803 )
1804
1805 ;; Pairwise FP Max/Min operations.
1806 (define_insn "aarch64_<optab>p<mode><vczle><vczbe>"
1807  [(set (match_operand:VHSDF 0 "register_operand" "=w")
1808        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1809                       (match_operand:VHSDF 2 "register_operand" "w")]
1810                       FMAXMINV))]
1811  "TARGET_SIMD"
1812  "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1813   [(set_attr "type" "neon_minmax<q>")]
1814 )
1815
1816 ;; vec_concat gives a new vector with the low elements from operand 1, and
1817 ;; the high elements from operand 2.  That is to say, given op1 = { a, b }
1818 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1819 ;; What that means, is that the RTL descriptions of the below patterns
1820 ;; need to change depending on endianness.
1821
1822 ;; Narrowing operations.
1823
1824 (define_insn "aarch64_xtn2<mode>_insn_le"
1825   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1826         (vec_concat:<VNARROWQ2>
1827           (match_operand:<VNARROWQ> 1 "register_operand" "0")
1828           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1829   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1830   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1831   [(set_attr "type" "neon_move_narrow_q")]
1832 )
1833
1834 (define_insn "aarch64_xtn2<mode>_insn_be"
1835   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1836         (vec_concat:<VNARROWQ2>
1837           (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
1838           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
1839   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1840   "xtn2\t%0.<V2ntype>, %2.<Vtype>"
1841   [(set_attr "type" "neon_move_narrow_q")]
1842 )
1843
1844 (define_expand "aarch64_xtn2<mode>"
1845   [(match_operand:<VNARROWQ2> 0 "register_operand")
1846    (match_operand:<VNARROWQ> 1 "register_operand")
1847    (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))]
1848   "TARGET_SIMD"
1849   {
1850     if (BYTES_BIG_ENDIAN)
1851       emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
1852                                                  operands[2]));
1853     else
1854       emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
1855                                                  operands[2]));
1856     DONE;
1857   }
1858 )
1859
1860 (define_insn "*aarch64_narrow_trunc<mode>"
1861   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1862         (vec_concat:<VNARROWQ2>
1863           (truncate:<VNARROWQ>
1864             (match_operand:VQN 1 "register_operand" "w"))
1865           (truncate:<VNARROWQ>
1866             (match_operand:VQN 2 "register_operand" "w"))))]
1867   "TARGET_SIMD"
1868 {
1869   if (!BYTES_BIG_ENDIAN)
1870     return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
1871   else
1872     return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
1873 }
1874   [(set_attr "type" "neon_permute<q>")]
1875 )
1876
1877 (define_insn "*aarch64_trunc_concat<mode>"
1878   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1879         (truncate:<VNARROWQ>
1880           (vec_concat:VQN
1881             (match_operand:<VHALF> 1 "register_operand" "w")
1882             (match_operand:<VHALF> 2 "register_operand" "w"))))]
1883   "TARGET_SIMD"
1884 {
1885   if (!BYTES_BIG_ENDIAN)
1886     return "uzp1\\t%0.<Vntype>, %1.<Vntype>, %2.<Vntype>";
1887   else
1888     return "uzp1\\t%0.<Vntype>, %2.<Vntype>, %1.<Vntype>";
1889 }
1890   [(set_attr "type" "neon_permute<q>")]
1891 )
1892
1893 ;; Packing doubles.
1894
1895 (define_expand "vec_pack_trunc_<mode>"
1896  [(match_operand:<VNARROWD> 0 "register_operand")
1897   (match_operand:VDN 1 "general_operand")
1898   (match_operand:VDN 2 "general_operand")]
1899  "TARGET_SIMD"
1900 {
1901   rtx tempreg = gen_reg_rtx (<VDBL>mode);
1902   emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2]));
1903   emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
1904   DONE;
1905 })
1906
1907 ;; Packing quads.
1908
1909 (define_expand "vec_pack_trunc_<mode>"
1910  [(set (match_operand:<VNARROWQ2> 0 "register_operand")
1911        (vec_concat:<VNARROWQ2>
1912          (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
1913          (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
1914  "TARGET_SIMD"
1915  {
1916    rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
1917    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1918    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1919
1920    emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
1921
1922    if (BYTES_BIG_ENDIAN)
1923      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
1924                                                 operands[hi]));
1925    else
1926      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
1927                                                 operands[hi]));
1928    DONE;
1929  }
1930 )
1931
1932 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_le"
1933   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1934         (vec_concat:<VNARROWQ2>
1935           (truncate:<VNARROWQ>
1936             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1937               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1938           (truncate:<VNARROWQ>
1939             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1940               (match_dup 2)))))]
1941   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1942   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1943   [(set_attr "type" "neon_permute<q>")]
1944 )
1945
1946 (define_insn "*aarch64_<srn_op>topbits_shuffle<mode>_be"
1947   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
1948         (vec_concat:<VNARROWQ2>
1949           (truncate:<VNARROWQ>
1950             (SHIFTRT:VQN (match_operand:VQN 3 "register_operand" "w")
1951               (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top")))
1952           (truncate:<VNARROWQ>
1953             (SHIFTRT:VQN (match_operand:VQN 1 "register_operand" "w")
1954               (match_dup 2)))))]
1955   "TARGET_SIMD && BYTES_BIG_ENDIAN"
1956   "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>"
1957   [(set_attr "type" "neon_permute<q>")]
1958 )
1959
1960 ;; Widening operations.
1961
1962 (define_insn_and_split "aarch64_simd_vec_unpack<su>_hi_<mode>"
1963   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1964         (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1965                                (match_operand:VQW 1 "register_operand" "w")
1966                                (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1967                             )))]
1968   "TARGET_SIMD"
1969   "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1970   "&& <CODE> == ZERO_EXTEND
1971    && aarch64_split_simd_shift_p (insn)"
1972   [(const_int 0)]
1973   {
1974     /* On many cores, it is cheaper to implement UXTL2 using a ZIP2 with zero,
1975        provided that the cost of the zero can be amortized over several
1976        operations.  We'll later recombine the zero and zip if there are
1977        not sufficient uses of the zero to make the split worthwhile.  */
1978     rtx res = simplify_gen_subreg (<MODE>mode, operands[0], <VWIDE>mode, 0);
1979     rtx zero = aarch64_gen_shareable_zero (<MODE>mode);
1980     emit_insn (gen_aarch64_zip2<mode> (res, operands[1], zero));
1981     DONE;
1982   }
1983   [(set_attr "type" "neon_shift_imm_long")]
1984 )
1985
1986 (define_expand "vec_unpack<su>_hi_<mode>"
1987   [(match_operand:<VWIDE> 0 "register_operand")
1988    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1989   "TARGET_SIMD"
1990   {
1991     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1992     emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1993                                                           operands[1], p));
1994     DONE;
1995   }
1996 )
1997
1998 (define_expand "vec_unpack<su>_lo_<mode>"
1999   [(set (match_operand:<VWIDE> 0 "register_operand")
2000         (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand")))]
2001   "TARGET_SIMD"
2002   {
2003     operands[1] = lowpart_subreg (<VHALF>mode, operands[1], <MODE>mode);
2004   }
2005 )
2006
2007 ;; Widening arithmetic.
2008
2009 (define_insn "*aarch64_<su>mlal_lo<mode>"
2010   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2011         (plus:<VWIDE>
2012           (mult:<VWIDE>
2013               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2014                  (match_operand:VQW 2 "register_operand" "w")
2015                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2016               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2017                  (match_operand:VQW 4 "register_operand" "w")
2018                  (match_dup 3))))
2019           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2020   "TARGET_SIMD"
2021   "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2022   [(set_attr "type" "neon_mla_<Vetype>_long")]
2023 )
2024
2025 (define_insn "aarch64_<su>mlal_hi<mode>_insn"
2026   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2027         (plus:<VWIDE>
2028           (mult:<VWIDE>
2029               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2030                  (match_operand:VQW 2 "register_operand" "w")
2031                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2032               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2033                  (match_operand:VQW 4 "register_operand" "w")
2034                  (match_dup 3))))
2035           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2036   "TARGET_SIMD"
2037   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2038   [(set_attr "type" "neon_mla_<Vetype>_long")]
2039 )
2040
2041 (define_expand "aarch64_<su>mlal_hi<mode>"
2042   [(match_operand:<VWIDE> 0 "register_operand")
2043    (match_operand:<VWIDE> 1 "register_operand")
2044    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2045    (match_operand:VQW 3 "register_operand")]
2046   "TARGET_SIMD"
2047 {
2048   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2049   emit_insn (gen_aarch64_<su>mlal_hi<mode>_insn (operands[0], operands[1],
2050                                                  operands[2], p, operands[3]));
2051   DONE;
2052 }
2053 )
2054
2055 (define_insn "aarch64_<su>mlal_hi_n<mode>_insn"
2056   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2057         (plus:<VWIDE>
2058           (mult:<VWIDE>
2059             (ANY_EXTEND:<VWIDE>
2060               (vec_select:<VHALF>
2061                 (match_operand:VQ_HSI 2 "register_operand" "w")
2062                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2063             (vec_duplicate:<VWIDE>
2064               (ANY_EXTEND:<VWIDE_S>
2065                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))
2066           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2067   "TARGET_SIMD"
2068   "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2069   [(set_attr "type" "neon_mla_<Vetype>_long")]
2070 )
2071
2072 (define_expand "aarch64_<su>mlal_hi_n<mode>"
2073   [(match_operand:<VWIDE> 0 "register_operand")
2074    (match_operand:<VWIDE> 1 "register_operand")
2075    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2076    (match_operand:<VEL> 3 "register_operand")]
2077   "TARGET_SIMD"
2078 {
2079   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2080   emit_insn (gen_aarch64_<su>mlal_hi_n<mode>_insn (operands[0],
2081              operands[1], operands[2], p, operands[3]));
2082   DONE;
2083 }
2084 )
2085
2086 (define_insn "*aarch64_<su>mlsl_lo<mode>"
2087   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2088         (minus:<VWIDE>
2089           (match_operand:<VWIDE> 1 "register_operand" "0")
2090           (mult:<VWIDE>
2091               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2092                  (match_operand:VQW 2 "register_operand" "w")
2093                  (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2094               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2095                  (match_operand:VQW 4 "register_operand" "w")
2096                  (match_dup 3))))))]
2097   "TARGET_SIMD"
2098   "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
2099   [(set_attr "type" "neon_mla_<Vetype>_long")]
2100 )
2101
2102 (define_insn "aarch64_<su>mlsl_hi<mode>_insn"
2103   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2104         (minus:<VWIDE>
2105           (match_operand:<VWIDE> 1 "register_operand" "0")
2106           (mult:<VWIDE>
2107               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2108                  (match_operand:VQW 2 "register_operand" "w")
2109                  (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2110               (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2111                  (match_operand:VQW 4 "register_operand" "w")
2112                  (match_dup 3))))))]
2113   "TARGET_SIMD"
2114   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
2115   [(set_attr "type" "neon_mla_<Vetype>_long")]
2116 )
2117
2118 (define_expand "aarch64_<su>mlsl_hi<mode>"
2119   [(match_operand:<VWIDE> 0 "register_operand")
2120    (match_operand:<VWIDE> 1 "register_operand")
2121    (ANY_EXTEND:<VWIDE>(match_operand:VQW 2 "register_operand"))
2122    (match_operand:VQW 3 "register_operand")]
2123   "TARGET_SIMD"
2124 {
2125   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2126   emit_insn (gen_aarch64_<su>mlsl_hi<mode>_insn (operands[0], operands[1],
2127                                                  operands[2], p, operands[3]));
2128   DONE;
2129 }
2130 )
2131
2132 (define_insn "aarch64_<su>mlsl_hi_n<mode>_insn"
2133   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2134         (minus:<VWIDE>
2135           (match_operand:<VWIDE> 1 "register_operand" "0")
2136           (mult:<VWIDE>
2137             (ANY_EXTEND:<VWIDE>
2138               (vec_select:<VHALF>
2139                 (match_operand:VQ_HSI 2 "register_operand" "w")
2140                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2141             (vec_duplicate:<VWIDE>
2142               (ANY_EXTEND:<VWIDE_S>
2143                 (match_operand:<VEL> 4 "register_operand" "<h_con>"))))))]
2144   "TARGET_SIMD"
2145   "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[0]"
2146   [(set_attr "type" "neon_mla_<Vetype>_long")]
2147 )
2148
2149 (define_expand "aarch64_<su>mlsl_hi_n<mode>"
2150   [(match_operand:<VWIDE> 0 "register_operand")
2151    (match_operand:<VWIDE> 1 "register_operand")
2152    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2153    (match_operand:<VEL> 3 "register_operand")]
2154   "TARGET_SIMD"
2155 {
2156   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2157   emit_insn (gen_aarch64_<su>mlsl_hi_n<mode>_insn (operands[0],
2158              operands[1], operands[2], p, operands[3]));
2159   DONE;
2160 }
2161 )
2162
2163 (define_insn "aarch64_<su>mlal<mode>"
2164   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2165         (plus:<VWIDE>
2166           (mult:<VWIDE>
2167             (ANY_EXTEND:<VWIDE>
2168               (match_operand:VD_BHSI 2 "register_operand" "w"))
2169             (ANY_EXTEND:<VWIDE>
2170               (match_operand:VD_BHSI 3 "register_operand" "w")))
2171           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2172   "TARGET_SIMD"
2173   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2174   [(set_attr "type" "neon_mla_<Vetype>_long")]
2175 )
2176
2177 (define_insn "aarch64_<su>mlal_n<mode>"
2178   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2179         (plus:<VWIDE>
2180           (mult:<VWIDE>
2181             (ANY_EXTEND:<VWIDE>
2182               (match_operand:VD_HSI 2 "register_operand" "w"))
2183             (vec_duplicate:<VWIDE>
2184               (ANY_EXTEND:<VWIDE_S>
2185                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))
2186           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2187   "TARGET_SIMD"
2188   "<su>mlal\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2189   [(set_attr "type" "neon_mla_<Vetype>_long")]
2190 )
2191
2192 (define_insn "aarch64_<su>mlsl<mode>"
2193   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2194         (minus:<VWIDE>
2195           (match_operand:<VWIDE> 1 "register_operand" "0")
2196           (mult:<VWIDE>
2197             (ANY_EXTEND:<VWIDE>
2198               (match_operand:VD_BHSI 2 "register_operand" "w"))
2199             (ANY_EXTEND:<VWIDE>
2200               (match_operand:VD_BHSI 3 "register_operand" "w")))))]
2201   "TARGET_SIMD"
2202   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
2203   [(set_attr "type" "neon_mla_<Vetype>_long")]
2204 )
2205
2206 (define_insn "aarch64_<su>mlsl_n<mode>"
2207   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2208         (minus:<VWIDE>
2209           (match_operand:<VWIDE> 1 "register_operand" "0")
2210           (mult:<VWIDE>
2211             (ANY_EXTEND:<VWIDE>
2212               (match_operand:VD_HSI 2 "register_operand" "w"))
2213             (vec_duplicate:<VWIDE>
2214               (ANY_EXTEND:<VWIDE_S>
2215                 (match_operand:<VEL> 3 "register_operand" "<h_con>"))))))]
2216   "TARGET_SIMD"
2217   "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vetype>[0]"
2218   [(set_attr "type" "neon_mla_<Vetype>_long")]
2219 )
2220
2221 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
2222  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2223        (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2224                            (match_operand:VQW 1 "register_operand" "w")
2225                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2226                      (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2227                            (match_operand:VQW 2 "register_operand" "w")
2228                            (match_dup 3)))))]
2229   "TARGET_SIMD"
2230   "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2231   [(set_attr "type" "neon_mul_<Vetype>_long")]
2232 )
2233
2234 (define_insn "aarch64_intrinsic_vec_<su>mult_lo_<mode>"
2235   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2236         (mult:<VWIDE> (ANY_EXTEND:<VWIDE>
2237                          (match_operand:VD_BHSI 1 "register_operand" "w"))
2238                       (ANY_EXTEND:<VWIDE>
2239                          (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2240   "TARGET_SIMD"
2241   "<su>mull\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2242   [(set_attr "type" "neon_mul_<Vetype>_long")]
2243 )
2244
2245 (define_expand "vec_widen_<su>mult_lo_<mode>"
2246   [(match_operand:<VWIDE> 0 "register_operand")
2247    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2248    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2249  "TARGET_SIMD"
2250  {
2251    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2252    emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
2253                                                        operands[1],
2254                                                        operands[2], p));
2255    DONE;
2256  }
2257 )
2258
2259 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
2260  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2261       (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2262                             (match_operand:VQW 1 "register_operand" "w")
2263                             (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2264                     (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2265                             (match_operand:VQW 2 "register_operand" "w")
2266                             (match_dup 3)))))]
2267   "TARGET_SIMD"
2268   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2269   [(set_attr "type" "neon_mul_<Vetype>_long")]
2270 )
2271
2272 (define_expand "vec_widen_<su>mult_hi_<mode>"
2273   [(match_operand:<VWIDE> 0 "register_operand")
2274    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
2275    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
2276  "TARGET_SIMD"
2277  {
2278    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2279    emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
2280                                                        operands[1],
2281                                                        operands[2], p));
2282    DONE;
2283
2284  }
2285 )
2286
2287 ;; vmull_lane_s16 intrinsics
2288 (define_insn "aarch64_vec_<su>mult_lane<Qlane>"
2289   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2290         (mult:<VWIDE>
2291           (ANY_EXTEND:<VWIDE>
2292             (match_operand:<VCOND> 1 "register_operand" "w"))
2293           (vec_duplicate:<VWIDE>
2294             (ANY_EXTEND:<VWIDE_S>
2295               (vec_select:<VEL>
2296                 (match_operand:VDQHS 2 "register_operand" "<vwx>")
2297                 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))))))]
2298   "TARGET_SIMD"
2299   {
2300     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
2301     return "<su>mull\\t%0.<Vwtype>, %1.<Vcondtype>, %2.<Vetype>[%3]";
2302   }
2303   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2304 )
2305
2306 (define_insn "aarch64_<su>mull_hi_lane<mode>_insn"
2307   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2308         (mult:<VWIDE>
2309           (ANY_EXTEND:<VWIDE>
2310             (vec_select:<VHALF>
2311               (match_operand:VQ_HSI 1 "register_operand" "w")
2312               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2313           (vec_duplicate:<VWIDE>
2314             (ANY_EXTEND:<VWIDE_S>
2315               (vec_select:<VEL>
2316                 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2317                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2318   "TARGET_SIMD"
2319   {
2320     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
2321     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2322   }
2323   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2324 )
2325
2326 (define_expand "aarch64_<su>mull_hi_lane<mode>"
2327   [(match_operand:<VWIDE> 0 "register_operand")
2328    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2329    (match_operand:<VCOND> 2 "register_operand")
2330    (match_operand:SI 3 "immediate_operand")]
2331   "TARGET_SIMD"
2332 {
2333   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2334   emit_insn (gen_aarch64_<su>mull_hi_lane<mode>_insn (operands[0],
2335              operands[1], p, operands[2], operands[3]));
2336   DONE;
2337 }
2338 )
2339
2340 (define_insn "aarch64_<su>mull_hi_laneq<mode>_insn"
2341   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2342         (mult:<VWIDE>
2343           (ANY_EXTEND:<VWIDE>
2344             (vec_select:<VHALF>
2345               (match_operand:VQ_HSI 1 "register_operand" "w")
2346               (match_operand:VQ_HSI 2 "vect_par_cnst_hi_half" "")))
2347           (vec_duplicate:<VWIDE>
2348             (ANY_EXTEND:<VWIDE_S>
2349               (vec_select:<VEL>
2350                 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2351                 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))))))]
2352   "TARGET_SIMD"
2353   {
2354     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
2355     return "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %3.<Vetype>[%4]";
2356   }
2357   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2358 )
2359
2360 (define_expand "aarch64_<su>mull_hi_laneq<mode>"
2361   [(match_operand:<VWIDE> 0 "register_operand")
2362    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 1 "register_operand"))
2363    (match_operand:<VCONQ> 2 "register_operand")
2364    (match_operand:SI 3 "immediate_operand")]
2365   "TARGET_SIMD"
2366 {
2367   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2368   emit_insn (gen_aarch64_<su>mull_hi_laneq<mode>_insn (operands[0],
2369              operands[1], p, operands[2], operands[3]));
2370   DONE;
2371 }
2372 )
2373
2374 (define_insn "aarch64_<su>mull_n<mode>"
2375   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2376         (mult:<VWIDE>
2377           (ANY_EXTEND:<VWIDE>
2378             (match_operand:VD_HSI 1 "register_operand" "w"))
2379           (vec_duplicate:<VWIDE>
2380             (ANY_EXTEND:<VWIDE_S>
2381               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2382   "TARGET_SIMD"
2383   "<su>mull\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2384   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2385 )
2386
2387 (define_insn "aarch64_<su>mull_hi_n<mode>_insn"
2388   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2389         (mult:<VWIDE>
2390           (ANY_EXTEND:<VWIDE>
2391             (vec_select:<VHALF>
2392               (match_operand:VQ_HSI 1 "register_operand" "w")
2393               (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2394           (vec_duplicate:<VWIDE>
2395             (ANY_EXTEND:<VWIDE_S>
2396               (match_operand:<VEL> 2 "register_operand" "<h_con>")))))]
2397   "TARGET_SIMD"
2398   "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vetype>[0]"
2399   [(set_attr "type" "neon_mul_<Vetype>_scalar_long")]
2400 )
2401
2402 (define_expand "aarch64_<su>mull_hi_n<mode>"
2403   [(match_operand:<VWIDE> 0 "register_operand")
2404    (ANY_EXTEND:<VWIDE> (match_operand:VQ_HSI 1 "register_operand"))
2405    (match_operand:<VEL> 2 "register_operand")]
2406  "TARGET_SIMD"
2407  {
2408    rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2409    emit_insn (gen_aarch64_<su>mull_hi_n<mode>_insn (operands[0], operands[1],
2410                                                     operands[2], p));
2411    DONE;
2412  }
2413 )
2414
2415 ;; vmlal_lane_s16 intrinsics
2416 (define_insn "aarch64_vec_<su>mlal_lane<Qlane>"
2417   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2418         (plus:<VWIDE>
2419           (mult:<VWIDE>
2420             (ANY_EXTEND:<VWIDE>
2421               (match_operand:<VCOND> 2 "register_operand" "w"))
2422             (vec_duplicate:<VWIDE>
2423               (ANY_EXTEND:<VWIDE_S>
2424                 (vec_select:<VEL>
2425                   (match_operand:VDQHS 3 "register_operand" "<vwx>")
2426                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))
2427           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2428   "TARGET_SIMD"
2429   {
2430     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2431     return "<su>mlal\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2432   }
2433   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2434 )
2435
2436 (define_insn "aarch64_<su>mlal_hi_lane<mode>_insn"
2437   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2438         (plus:<VWIDE>
2439           (mult:<VWIDE>
2440             (ANY_EXTEND:<VWIDE>
2441               (vec_select:<VHALF>
2442                 (match_operand:VQ_HSI 2 "register_operand" "w")
2443                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2444             (vec_duplicate:<VWIDE>
2445               (ANY_EXTEND:<VWIDE_S>
2446                 (vec_select:<VEL>
2447                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2448                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2449           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2450   "TARGET_SIMD"
2451   {
2452     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2453     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2454   }
2455   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2456 )
2457
2458 (define_expand "aarch64_<su>mlal_hi_lane<mode>"
2459   [(match_operand:<VWIDE> 0 "register_operand")
2460    (match_operand:<VWIDE> 1 "register_operand")
2461    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2462    (match_operand:<VCOND> 3 "register_operand")
2463    (match_operand:SI 4 "immediate_operand")]
2464   "TARGET_SIMD"
2465 {
2466   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2467   emit_insn (gen_aarch64_<su>mlal_hi_lane<mode>_insn (operands[0],
2468              operands[1], operands[2], p, operands[3], operands[4]));
2469   DONE;
2470 }
2471 )
2472
2473 (define_insn "aarch64_<su>mlal_hi_laneq<mode>_insn"
2474   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2475         (plus:<VWIDE>
2476           (mult:<VWIDE>
2477             (ANY_EXTEND:<VWIDE>
2478               (vec_select:<VHALF>
2479                 (match_operand:VQ_HSI 2 "register_operand" "w")
2480                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2481             (vec_duplicate:<VWIDE>
2482               (ANY_EXTEND:<VWIDE_S>
2483                 (vec_select:<VEL>
2484                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2485                   (parallel [(match_operand:SI 5 "immediate_operand" "i")])))))
2486           (match_operand:<VWIDE> 1 "register_operand" "0")))]
2487   "TARGET_SIMD"
2488   {
2489     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2490     return "<su>mlal2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2491   }
2492   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2493 )
2494
2495 (define_expand "aarch64_<su>mlal_hi_laneq<mode>"
2496   [(match_operand:<VWIDE> 0 "register_operand")
2497    (match_operand:<VWIDE> 1 "register_operand")
2498    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2499    (match_operand:<VCONQ> 3 "register_operand")
2500    (match_operand:SI 4 "immediate_operand")]
2501   "TARGET_SIMD"
2502 {
2503   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2504   emit_insn (gen_aarch64_<su>mlal_hi_laneq<mode>_insn (operands[0],
2505              operands[1], operands[2], p, operands[3], operands[4]));
2506   DONE;
2507 }
2508 )
2509
2510 (define_insn "aarch64_vec_<su>mlsl_lane<Qlane>"
2511   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2512    (minus:<VWIDE>
2513      (match_operand:<VWIDE> 1 "register_operand" "0")
2514      (mult:<VWIDE>
2515        (ANY_EXTEND:<VWIDE>
2516          (match_operand:<VCOND> 2 "register_operand" "w"))
2517        (vec_duplicate:<VWIDE>
2518          (ANY_EXTEND:<VWIDE_S>
2519            (vec_select:<VEL>
2520              (match_operand:VDQHS 3 "register_operand" "<vwx>")
2521              (parallel [(match_operand:SI 4 "immediate_operand" "i")])))))))]
2522   "TARGET_SIMD"
2523   {
2524     operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
2525     return "<su>mlsl\\t%0.<Vwtype>, %2.<Vcondtype>, %3.<Vetype>[%4]";
2526   }
2527   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2528 )
2529
2530 (define_insn "aarch64_<su>mlsl_hi_lane<mode>_insn"
2531   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2532         (minus:<VWIDE>
2533           (match_operand:<VWIDE> 1 "register_operand" "0")
2534           (mult:<VWIDE>
2535             (ANY_EXTEND:<VWIDE>
2536               (vec_select:<VHALF>
2537                 (match_operand:VQ_HSI 2 "register_operand" "w")
2538                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2539             (vec_duplicate:<VWIDE>
2540               (ANY_EXTEND:<VWIDE_S>
2541                 (vec_select:<VEL>
2542                   (match_operand:<VCOND> 4 "register_operand" "<vwx>")
2543                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2544           )))]
2545   "TARGET_SIMD"
2546   {
2547     operands[5] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[5]));
2548     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2549   }
2550   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2551 )
2552
2553 (define_expand "aarch64_<su>mlsl_hi_lane<mode>"
2554   [(match_operand:<VWIDE> 0 "register_operand")
2555    (match_operand:<VWIDE> 1 "register_operand")
2556    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2557    (match_operand:<VCOND> 3 "register_operand")
2558    (match_operand:SI 4 "immediate_operand")]
2559   "TARGET_SIMD"
2560 {
2561   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2562   emit_insn (gen_aarch64_<su>mlsl_hi_lane<mode>_insn (operands[0],
2563              operands[1], operands[2], p, operands[3], operands[4]));
2564   DONE;
2565 }
2566 )
2567
2568 (define_insn "aarch64_<su>mlsl_hi_laneq<mode>_insn"
2569   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2570         (minus:<VWIDE>
2571           (match_operand:<VWIDE> 1 "register_operand" "0")
2572           (mult:<VWIDE>
2573             (ANY_EXTEND:<VWIDE>
2574               (vec_select:<VHALF>
2575                 (match_operand:VQ_HSI 2 "register_operand" "w")
2576                 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
2577             (vec_duplicate:<VWIDE>
2578               (ANY_EXTEND:<VWIDE_S>
2579                 (vec_select:<VEL>
2580                   (match_operand:<VCONQ> 4 "register_operand" "<vwx>")
2581                   (parallel [(match_operand:SI 5 "immediate_operand" "i")]))))
2582           )))]
2583   "TARGET_SIMD"
2584   {
2585     operands[5] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[5]));
2586     return "<su>mlsl2\\t%0.<Vwtype>, %2.<Vtype>, %4.<Vetype>[%5]";
2587   }
2588   [(set_attr "type" "neon_mla_<Vetype>_scalar_long")]
2589 )
2590
2591 (define_expand "aarch64_<su>mlsl_hi_laneq<mode>"
2592   [(match_operand:<VWIDE> 0 "register_operand")
2593    (match_operand:<VWIDE> 1 "register_operand")
2594    (ANY_EXTEND:<VWIDE>(match_operand:VQ_HSI 2 "register_operand"))
2595    (match_operand:<VCONQ> 3 "register_operand")
2596    (match_operand:SI 4 "immediate_operand")]
2597   "TARGET_SIMD"
2598 {
2599   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2600   emit_insn (gen_aarch64_<su>mlsl_hi_laneq<mode>_insn (operands[0],
2601              operands[1], operands[2], p, operands[3], operands[4]));
2602   DONE;
2603 }
2604 )
2605
2606 ;; FP vector operations.
2607 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
2608 ;; double-precision (64-bit) floating-point data types and arithmetic as
2609 ;; defined by the IEEE 754-2008 standard.  This makes them vectorizable
2610 ;; without the need for -ffast-math or -funsafe-math-optimizations.
2611 ;;
2612 ;; Floating-point operations can raise an exception.  Vectorizing such
2613 ;; operations are safe because of reasons explained below.
2614 ;;
2615 ;; ARMv8 permits an extension to enable trapped floating-point
2616 ;; exception handling, however this is an optional feature.  In the
2617 ;; event of a floating-point exception being raised by vectorised
2618 ;; code then:
2619 ;; 1.  If trapped floating-point exceptions are available, then a trap
2620 ;;     will be taken when any lane raises an enabled exception.  A trap
2621 ;;     handler may determine which lane raised the exception.
2622 ;; 2.  Alternatively a sticky exception flag is set in the
2623 ;;     floating-point status register (FPSR).  Software may explicitly
2624 ;;     test the exception flags, in which case the tests will either
2625 ;;     prevent vectorisation, allowing precise identification of the
2626 ;;     failing operation, or if tested outside of vectorisable regions
2627 ;;     then the specific operation and lane are not of interest.
2628
2629 ;; FP arithmetic operations.
2630
2631 (define_insn "add<mode>3<vczle><vczbe>"
2632  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2633        (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2634                    (match_operand:VHSDF 2 "register_operand" "w")))]
2635  "TARGET_SIMD"
2636  "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2637   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2638 )
2639
2640 (define_insn "sub<mode>3<vczle><vczbe>"
2641  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2642        (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2643                     (match_operand:VHSDF 2 "register_operand" "w")))]
2644  "TARGET_SIMD"
2645  "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2646   [(set_attr "type" "neon_fp_addsub_<stype><q>")]
2647 )
2648
2649 (define_insn "mul<mode>3<vczle><vczbe>"
2650  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2651        (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2652                    (match_operand:VHSDF 2 "register_operand" "w")))]
2653  "TARGET_SIMD"
2654  "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2655   [(set_attr "type" "neon_fp_mul_<stype><q>")]
2656 )
2657
2658 (define_expand "div<mode>3"
2659  [(set (match_operand:VHSDF 0 "register_operand")
2660        (div:VHSDF (match_operand:VHSDF 1 "register_operand")
2661                   (match_operand:VHSDF 2 "register_operand")))]
2662  "TARGET_SIMD"
2663 {
2664   if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
2665     DONE;
2666
2667   operands[1] = force_reg (<MODE>mode, operands[1]);
2668 })
2669
2670 (define_insn "*div<mode>3<vczle><vczbe>"
2671  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2672        (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2673                  (match_operand:VHSDF 2 "register_operand" "w")))]
2674  "TARGET_SIMD"
2675  "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2676   [(set_attr "type" "neon_fp_div_<stype><q>")]
2677 )
2678
2679 (define_insn "neg<mode>2<vczle><vczbe>"
2680  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2681        (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2682  "TARGET_SIMD"
2683  "fneg\\t%0.<Vtype>, %1.<Vtype>"
2684   [(set_attr "type" "neon_fp_neg_<stype><q>")]
2685 )
2686
2687 (define_insn "aarch64_fnegv2di2<vczle><vczbe>"
2688  [(set (match_operand:V2DI 0 "register_operand" "=w")
2689        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
2690                       UNSPEC_FNEG))]
2691  "TARGET_SIMD"
2692  "fneg\\t%0.2d, %1.2d"
2693   [(set_attr "type" "neon_fp_neg_d")]
2694 )
2695
2696 (define_insn "abs<mode>2<vczle><vczbe>"
2697  [(set (match_operand:VHSDF 0 "register_operand" "=w")
2698        (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
2699  "TARGET_SIMD"
2700  "fabs\\t%0.<Vtype>, %1.<Vtype>"
2701   [(set_attr "type" "neon_fp_abs_<stype><q>")]
2702 )
2703
2704 (define_expand "aarch64_float_mla<mode>"
2705   [(set (match_operand:VDQF_DF 0 "register_operand")
2706         (plus:VDQF_DF
2707           (mult:VDQF_DF
2708             (match_operand:VDQF_DF 2 "register_operand")
2709             (match_operand:VDQF_DF 3 "register_operand"))
2710           (match_operand:VDQF_DF 1 "register_operand")))]
2711   "TARGET_SIMD"
2712   {
2713     rtx scratch = gen_reg_rtx (<MODE>mode);
2714     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2715     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2716     DONE;
2717   }
2718 )
2719
2720 (define_expand "aarch64_float_mls<mode>"
2721   [(set (match_operand:VDQF_DF 0 "register_operand")
2722         (minus:VDQF_DF
2723           (match_operand:VDQF_DF 1 "register_operand")
2724           (mult:VDQF_DF
2725             (match_operand:VDQF_DF 2 "register_operand")
2726             (match_operand:VDQF_DF 3 "register_operand"))))]
2727   "TARGET_SIMD"
2728   {
2729     rtx scratch = gen_reg_rtx (<MODE>mode);
2730     emit_insn (gen_mul<mode>3 (scratch, operands[2], operands[3]));
2731     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2732     DONE;
2733   }
2734 )
2735
2736 (define_expand "aarch64_float_mla_n<mode>"
2737   [(set (match_operand:VDQSF 0 "register_operand")
2738         (plus:VDQSF
2739           (mult:VDQSF
2740             (vec_duplicate:VDQSF
2741               (match_operand:<VEL> 3 "register_operand"))
2742             (match_operand:VDQSF 2 "register_operand"))
2743           (match_operand:VDQSF 1 "register_operand")))]
2744   "TARGET_SIMD"
2745   {
2746     rtx scratch = gen_reg_rtx (<MODE>mode);
2747     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2748     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2749     DONE;
2750   }
2751 )
2752
2753 (define_expand "aarch64_float_mls_n<mode>"
2754   [(set (match_operand:VDQSF 0 "register_operand")
2755         (minus:VDQSF
2756           (match_operand:VDQSF 1 "register_operand")
2757           (mult:VDQSF
2758             (vec_duplicate:VDQSF
2759               (match_operand:<VEL> 3 "register_operand"))
2760             (match_operand:VDQSF 2 "register_operand"))))]
2761   "TARGET_SIMD"
2762   {
2763     rtx scratch = gen_reg_rtx (<MODE>mode);
2764     emit_insn (gen_mul_n<mode>3 (scratch, operands[2], operands[3]));
2765     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2766     DONE;
2767   }
2768 )
2769
2770 (define_expand "aarch64_float_mla_lane<mode>"
2771   [(set (match_operand:VDQSF 0 "register_operand")
2772         (plus:VDQSF
2773           (mult:VDQSF
2774             (vec_duplicate:VDQSF
2775               (vec_select:<VEL>
2776                 (match_operand:V2SF 3 "register_operand")
2777                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2778             (match_operand:VDQSF 2 "register_operand"))
2779           (match_operand:VDQSF 1 "register_operand")))]
2780   "TARGET_SIMD"
2781   {
2782     rtx scratch = gen_reg_rtx (<MODE>mode);
2783     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2784                                     operands[3], operands[4]));
2785     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2786     DONE;
2787   }
2788 )
2789
2790 (define_expand "aarch64_float_mls_lane<mode>"
2791   [(set (match_operand:VDQSF 0 "register_operand")
2792         (minus:VDQSF
2793           (match_operand:VDQSF 1 "register_operand")
2794           (mult:VDQSF
2795             (vec_duplicate:VDQSF
2796               (vec_select:<VEL>
2797                 (match_operand:V2SF 3 "register_operand")
2798                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2799             (match_operand:VDQSF 2 "register_operand"))))]
2800   "TARGET_SIMD"
2801   {
2802     rtx scratch = gen_reg_rtx (<MODE>mode);
2803     emit_insn (gen_mul_lane<mode>3 (scratch, operands[2],
2804                                     operands[3], operands[4]));
2805     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2806     DONE;
2807   }
2808 )
2809
2810 (define_expand "aarch64_float_mla_laneq<mode>"
2811   [(set (match_operand:VDQSF 0 "register_operand")
2812         (plus:VDQSF
2813           (mult:VDQSF
2814             (vec_duplicate:VDQSF
2815               (vec_select:<VEL>
2816                 (match_operand:V4SF 3 "register_operand")
2817                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2818             (match_operand:VDQSF 2 "register_operand"))
2819           (match_operand:VDQSF 1 "register_operand")))]
2820   "TARGET_SIMD"
2821   {
2822     rtx scratch = gen_reg_rtx (<MODE>mode);
2823     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2824                                      operands[3], operands[4]));
2825     emit_insn (gen_add<mode>3 (operands[0], operands[1], scratch));
2826     DONE;
2827   }
2828 )
2829
2830 (define_expand "aarch64_float_mls_laneq<mode>"
2831   [(set (match_operand:VDQSF 0 "register_operand")
2832         (minus:VDQSF
2833           (match_operand:VDQSF 1 "register_operand")
2834           (mult:VDQSF
2835             (vec_duplicate:VDQSF
2836               (vec_select:<VEL>
2837                 (match_operand:V4SF 3 "register_operand")
2838                 (parallel [(match_operand:SI 4 "immediate_operand")])))
2839             (match_operand:VDQSF 2 "register_operand"))))]
2840   "TARGET_SIMD"
2841   {
2842     rtx scratch = gen_reg_rtx (<MODE>mode);
2843     emit_insn (gen_mul_laneq<mode>3 (scratch, operands[2],
2844                                      operands[3], operands[4]));
2845     emit_insn (gen_sub<mode>3 (operands[0], operands[1], scratch));
2846     DONE;
2847   }
2848 )
2849
2850 (define_insn "fma<mode>4<vczle><vczbe>"
2851   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2852        (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2853                   (match_operand:VHSDF 2 "register_operand" "w")
2854                   (match_operand:VHSDF 3 "register_operand" "0")))]
2855   "TARGET_SIMD"
2856  "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2857   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2858 )
2859
2860 (define_insn "*aarch64_fma4_elt<mode><vczle><vczbe>"
2861   [(set (match_operand:VDQF 0 "register_operand" "=w")
2862     (fma:VDQF
2863       (vec_duplicate:VDQF
2864         (vec_select:<VEL>
2865           (match_operand:VDQF 1 "register_operand" "<h_con>")
2866           (parallel [(match_operand:SI 2 "immediate_operand")])))
2867       (match_operand:VDQF 3 "register_operand" "w")
2868       (match_operand:VDQF 4 "register_operand" "0")))]
2869   "TARGET_SIMD"
2870   {
2871     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2872     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2873   }
2874   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2875 )
2876
2877 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2878   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2879     (fma:VDQSF
2880       (vec_duplicate:VDQSF
2881         (vec_select:<VEL>
2882           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2883           (parallel [(match_operand:SI 2 "immediate_operand")])))
2884       (match_operand:VDQSF 3 "register_operand" "w")
2885       (match_operand:VDQSF 4 "register_operand" "0")))]
2886   "TARGET_SIMD"
2887   {
2888     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2889     return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2890   }
2891   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2892 )
2893
2894 (define_insn "*aarch64_fma4_elt_from_dup<mode><vczle><vczbe>"
2895   [(set (match_operand:VMUL 0 "register_operand" "=w")
2896     (fma:VMUL
2897       (vec_duplicate:VMUL
2898           (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2899       (match_operand:VMUL 2 "register_operand" "w")
2900       (match_operand:VMUL 3 "register_operand" "0")))]
2901   "TARGET_SIMD"
2902   "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2903   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2904 )
2905
2906 (define_insn "*aarch64_fma4_elt_to_64v2df"
2907   [(set (match_operand:DF 0 "register_operand" "=w")
2908     (fma:DF
2909         (vec_select:DF
2910           (match_operand:V2DF 1 "register_operand" "w")
2911           (parallel [(match_operand:SI 2 "immediate_operand")]))
2912       (match_operand:DF 3 "register_operand" "w")
2913       (match_operand:DF 4 "register_operand" "0")))]
2914   "TARGET_SIMD"
2915   {
2916     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2917     return "fmla\\t%0.2d, %3.2d, %1.d[%2]";
2918   }
2919   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2920 )
2921
2922 (define_insn "fnma<mode>4<vczle><vczbe>"
2923   [(set (match_operand:VHSDF 0 "register_operand" "=w")
2924         (fma:VHSDF
2925           (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
2926           (match_operand:VHSDF 2 "register_operand" "w")
2927           (match_operand:VHSDF 3 "register_operand" "0")))]
2928   "TARGET_SIMD"
2929   "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2930   [(set_attr "type" "neon_fp_mla_<stype><q>")]
2931 )
2932
2933 (define_insn "*aarch64_fnma4_elt<mode><vczle><vczbe>"
2934   [(set (match_operand:VDQF 0 "register_operand" "=w")
2935     (fma:VDQF
2936       (neg:VDQF
2937         (match_operand:VDQF 3 "register_operand" "w"))
2938       (vec_duplicate:VDQF
2939         (vec_select:<VEL>
2940           (match_operand:VDQF 1 "register_operand" "<h_con>")
2941           (parallel [(match_operand:SI 2 "immediate_operand")])))
2942       (match_operand:VDQF 4 "register_operand" "0")))]
2943   "TARGET_SIMD"
2944   {
2945     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2946     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2947   }
2948   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2949 )
2950
2951 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode><vczle><vczbe>"
2952   [(set (match_operand:VDQSF 0 "register_operand" "=w")
2953     (fma:VDQSF
2954       (neg:VDQSF
2955         (match_operand:VDQSF 3 "register_operand" "w"))
2956       (vec_duplicate:VDQSF
2957         (vec_select:<VEL>
2958           (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
2959           (parallel [(match_operand:SI 2 "immediate_operand")])))
2960       (match_operand:VDQSF 4 "register_operand" "0")))]
2961   "TARGET_SIMD"
2962   {
2963     operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
2964     return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
2965   }
2966   [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
2967 )
2968
2969 (define_insn "*aarch64_fnma4_elt_from_dup<mode><vczle><vczbe>"
2970   [(set (match_operand:VMUL 0 "register_operand" "=w")
2971     (fma:VMUL
2972       (neg:VMUL
2973         (match_operand:VMUL 2 "register_operand" "w"))
2974       (vec_duplicate:VMUL
2975         (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2976       (match_operand:VMUL 3 "register_operand" "0")))]
2977   "TARGET_SIMD"
2978   "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2979   [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2980 )
2981
2982 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2983   [(set (match_operand:DF 0 "register_operand" "=w")
2984     (fma:DF
2985       (vec_select:DF
2986         (match_operand:V2DF 1 "register_operand" "w")
2987         (parallel [(match_operand:SI 2 "immediate_operand")]))
2988       (neg:DF
2989         (match_operand:DF 3 "register_operand" "w"))
2990       (match_operand:DF 4 "register_operand" "0")))]
2991   "TARGET_SIMD"
2992   {
2993     operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2994     return "fmls\\t%0.2d, %3.2d, %1.d[%2]";
2995   }
2996   [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2997 )
2998
2999 ;; Vector versions of the floating-point frint patterns.
3000 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
3001 (define_insn "<frint_pattern><mode>2<vczle><vczbe>"
3002   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3003         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3004                        FRINT))]
3005   "TARGET_SIMD"
3006   "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
3007   [(set_attr "type" "neon_fp_round_<stype><q>")]
3008 )
3009
3010 ;; Vector versions of the fcvt standard patterns.
3011 ;; Expands to lbtrunc, lround, lceil, lfloor
3012 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
3013   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3014         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3015                                [(match_operand:VHSDF 1 "register_operand" "w")]
3016                                FCVT)))]
3017   "TARGET_SIMD"
3018   "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
3019   [(set_attr "type" "neon_fp_to_int_<stype><q>")]
3020 )
3021
3022 ;; HF Scalar variants of related SIMD instructions.
3023 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
3024   [(set (match_operand:HI 0 "register_operand" "=w")
3025         (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
3026                       FCVT)))]
3027   "TARGET_SIMD_F16INST"
3028   "fcvt<frint_suffix><su>\t%h0, %h1"
3029   [(set_attr "type" "neon_fp_to_int_s")]
3030 )
3031
3032 (define_insn "<optab>_trunchfhi2"
3033   [(set (match_operand:HI 0 "register_operand" "=w")
3034         (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
3035   "TARGET_SIMD_F16INST"
3036   "fcvtz<su>\t%h0, %h1"
3037   [(set_attr "type" "neon_fp_to_int_s")]
3038 )
3039
3040 (define_insn "<optab>hihf2"
3041   [(set (match_operand:HF 0 "register_operand" "=w")
3042         (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
3043   "TARGET_SIMD_F16INST"
3044   "<su_optab>cvtf\t%h0, %h1"
3045   [(set_attr "type" "neon_int_to_fp_s")]
3046 )
3047
3048 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
3049   [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
3050         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3051                                [(mult:VDQF
3052          (match_operand:VDQF 1 "register_operand" "w")
3053          (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
3054                                UNSPEC_FRINTZ)))]
3055   "TARGET_SIMD
3056    && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
3057                 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
3058   {
3059     int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
3060     char buf[64];
3061     snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
3062     output_asm_insn (buf, operands);
3063     return "";
3064   }
3065   [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
3066 )
3067
3068 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
3069   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3070         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3071                                [(match_operand:VHSDF 1 "register_operand")]
3072                                 UNSPEC_FRINTZ)))]
3073   "TARGET_SIMD"
3074   {})
3075
3076 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
3077   [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
3078         (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
3079                                [(match_operand:VHSDF 1 "register_operand")]
3080                                 UNSPEC_FRINTZ)))]
3081   "TARGET_SIMD"
3082   {})
3083
3084 (define_expand "ftrunc<VHSDF:mode>2"
3085   [(set (match_operand:VHSDF 0 "register_operand")
3086         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
3087                        UNSPEC_FRINTZ))]
3088   "TARGET_SIMD"
3089   {})
3090
3091 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
3092   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3093         (FLOATUORS:VHSDF
3094           (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
3095   "TARGET_SIMD"
3096   "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
3097   [(set_attr "type" "neon_int_to_fp_<stype><q>")]
3098 )
3099
3100 ;; Conversions between vectors of floats and doubles.
3101 ;; Contains a mix of patterns to match standard pattern names
3102 ;; and those for intrinsics.
3103
3104 ;; Float widening operations.
3105
3106 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
3107   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3108         (float_extend:<VWIDE> (vec_select:<VHALF>
3109                                (match_operand:VQ_HSF 1 "register_operand" "w")
3110                                (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
3111                             )))]
3112   "TARGET_SIMD"
3113   "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
3114   [(set_attr "type" "neon_fp_cvt_widen_s")]
3115 )
3116
3117 ;; Convert between fixed-point and floating-point (vector modes)
3118
3119 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
3120   [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
3121         (unspec:<VHSDF:FCVT_TARGET>
3122           [(match_operand:VHSDF 1 "register_operand" "w")
3123            (match_operand:SI 2 "immediate_operand" "i")]
3124          FCVT_F2FIXED))]
3125   "TARGET_SIMD"
3126   "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3127   [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
3128 )
3129
3130 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
3131   [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
3132         (unspec:<VDQ_HSDI:FCVT_TARGET>
3133           [(match_operand:VDQ_HSDI 1 "register_operand" "w")
3134            (match_operand:SI 2 "immediate_operand" "i")]
3135          FCVT_FIXED2F))]
3136   "TARGET_SIMD"
3137   "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
3138   [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
3139 )
3140
3141 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
3142 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
3143 ;; the meaning of HI and LO changes depending on the target endianness.
3144 ;; While elsewhere we map the higher numbered elements of a vector to
3145 ;; the lower architectural lanes of the vector, for these patterns we want
3146 ;; to always treat "hi" as referring to the higher architectural lanes.
3147 ;; Consequently, while the patterns below look inconsistent with our
3148 ;; other big-endian patterns their behavior is as required.
3149
3150 (define_expand "vec_unpacks_lo_<mode>"
3151   [(match_operand:<VWIDE> 0 "register_operand")
3152    (match_operand:VQ_HSF 1 "register_operand")]
3153   "TARGET_SIMD"
3154   {
3155     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3156     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3157                                                        operands[1], p));
3158     DONE;
3159   }
3160 )
3161
3162 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
3163   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3164         (float_extend:<VWIDE> (vec_select:<VHALF>
3165                                (match_operand:VQ_HSF 1 "register_operand" "w")
3166                                (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
3167                             )))]
3168   "TARGET_SIMD"
3169   "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
3170   [(set_attr "type" "neon_fp_cvt_widen_s")]
3171 )
3172
3173 (define_expand "vec_unpacks_hi_<mode>"
3174   [(match_operand:<VWIDE> 0 "register_operand")
3175    (match_operand:VQ_HSF 1 "register_operand")]
3176   "TARGET_SIMD"
3177   {
3178     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179     emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
3180                                                        operands[1], p));
3181     DONE;
3182   }
3183 )
3184 (define_insn "extend<mode><Vwide>2"
3185   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3186         (float_extend:<VWIDE>
3187           (match_operand:VDF 1 "register_operand" "w")))]
3188   "TARGET_SIMD"
3189   "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
3190   [(set_attr "type" "neon_fp_cvt_widen_s")]
3191 )
3192
3193 ;; Float narrowing operations.
3194
3195 (define_insn "aarch64_float_trunc_rodd_df"
3196   [(set (match_operand:SF 0 "register_operand" "=w")
3197         (unspec:SF [(match_operand:DF 1 "register_operand" "w")]
3198                 UNSPEC_FCVTXN))]
3199   "TARGET_SIMD"
3200   "fcvtxn\\t%s0, %d1"
3201   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3202 )
3203
3204 (define_insn "aarch64_float_trunc_rodd_lo_v2sf"
3205   [(set (match_operand:V2SF 0 "register_operand" "=w")
3206         (unspec:V2SF [(match_operand:V2DF 1 "register_operand" "w")]
3207                 UNSPEC_FCVTXN))]
3208   "TARGET_SIMD"
3209   "fcvtxn\\t%0.2s, %1.2d"
3210   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3211 )
3212
3213 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_le"
3214   [(set (match_operand:V4SF 0 "register_operand" "=w")
3215         (vec_concat:V4SF
3216           (match_operand:V2SF 1 "register_operand" "0")
3217           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3218                 UNSPEC_FCVTXN)))]
3219   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3220   "fcvtxn2\\t%0.4s, %2.2d"
3221   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3222 )
3223
3224 (define_insn "aarch64_float_trunc_rodd_hi_v4sf_be"
3225   [(set (match_operand:V4SF 0 "register_operand" "=w")
3226         (vec_concat:V4SF
3227           (unspec:V2SF [(match_operand:V2DF 2 "register_operand" "w")]
3228                 UNSPEC_FCVTXN)
3229           (match_operand:V2SF 1 "register_operand" "0")))]
3230   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3231   "fcvtxn2\\t%0.4s, %2.2d"
3232   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3233 )
3234
3235 (define_expand "aarch64_float_trunc_rodd_hi_v4sf"
3236   [(match_operand:V4SF 0 "register_operand")
3237    (match_operand:V2SF 1 "register_operand")
3238    (match_operand:V2DF 2 "register_operand")]
3239   "TARGET_SIMD"
3240 {
3241   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3242                              ? gen_aarch64_float_trunc_rodd_hi_v4sf_be
3243                              : gen_aarch64_float_trunc_rodd_hi_v4sf_le;
3244   emit_insn (gen (operands[0], operands[1], operands[2]));
3245   DONE;
3246 }
3247 )
3248
3249 (define_insn "trunc<Vwide><mode>2<vczle><vczbe>"
3250   [(set (match_operand:VDF 0 "register_operand" "=w")
3251       (float_truncate:VDF
3252         (match_operand:<VWIDE> 1 "register_operand" "w")))]
3253   "TARGET_SIMD"
3254   "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
3255   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3256 )
3257
3258 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
3259   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3260     (vec_concat:<VDBL>
3261       (match_operand:VDF 1 "register_operand" "0")
3262       (float_truncate:VDF
3263         (match_operand:<VWIDE> 2 "register_operand" "w"))))]
3264   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3265   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3266   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3267 )
3268
3269 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
3270   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3271     (vec_concat:<VDBL>
3272       (float_truncate:VDF
3273         (match_operand:<VWIDE> 2 "register_operand" "w"))
3274       (match_operand:VDF 1 "register_operand" "0")))]
3275   "TARGET_SIMD && BYTES_BIG_ENDIAN"
3276   "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
3277   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
3278 )
3279
3280 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
3281   [(match_operand:<VDBL> 0 "register_operand")
3282    (match_operand:VDF 1 "register_operand")
3283    (match_operand:<VWIDE> 2 "register_operand")]
3284   "TARGET_SIMD"
3285 {
3286   rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
3287                              ? gen_aarch64_float_truncate_hi_<Vdbl>_be
3288                              : gen_aarch64_float_truncate_hi_<Vdbl>_le;
3289   emit_insn (gen (operands[0], operands[1], operands[2]));
3290   DONE;
3291 }
3292 )
3293
3294 (define_expand "vec_pack_trunc_v2df"
3295   [(set (match_operand:V4SF 0 "register_operand")
3296       (vec_concat:V4SF
3297         (float_truncate:V2SF
3298             (match_operand:V2DF 1 "register_operand"))
3299         (float_truncate:V2SF
3300             (match_operand:V2DF 2 "register_operand"))
3301           ))]
3302   "TARGET_SIMD"
3303   {
3304     rtx tmp = gen_reg_rtx (V2SFmode);
3305     int lo = BYTES_BIG_ENDIAN ? 2 : 1;
3306     int hi = BYTES_BIG_ENDIAN ? 1 : 2;
3307
3308     emit_insn (gen_truncv2dfv2sf2 (tmp, operands[lo]));
3309     emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
3310                                                    tmp, operands[hi]));
3311     DONE;
3312   }
3313 )
3314
3315 (define_expand "vec_pack_trunc_df"
3316   [(set (match_operand:V2SF 0 "register_operand")
3317         (vec_concat:V2SF
3318           (float_truncate:SF (match_operand:DF 1 "general_operand"))
3319           (float_truncate:SF (match_operand:DF 2 "general_operand"))))]
3320   "TARGET_SIMD"
3321   {
3322     rtx tmp = gen_reg_rtx (V2SFmode);
3323     emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2]));
3324     emit_insn (gen_truncv2dfv2sf2 (operands[0], tmp));
3325     DONE;
3326   }
3327 )
3328
3329 ;; FP Max/Min
3330 ;; Max/Min are introduced by idiom recognition by GCC's mid-end.  An
3331 ;; expression like:
3332 ;;      a = (b < c) ? b : c;
3333 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
3334 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
3335 ;; -ffast-math.
3336 ;;
3337 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
3338 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
3339 ;; operand will be returned when both operands are zero (i.e. they may not
3340 ;; honour signed zeroes), or when either operand is NaN.  Therefore GCC
3341 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
3342 ;; NaNs.
3343
3344 (define_insn "<su><maxmin><mode>3"
3345   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3346         (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
3347                        (match_operand:VHSDF 2 "register_operand" "w")))]
3348   "TARGET_SIMD"
3349   "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3350   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3351 )
3352
3353 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
3354 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
3355 ;; which implement the IEEE fmax ()/fmin () functions.
3356 (define_insn "<fmaxmin><mode>3<vczle><vczbe>"
3357   [(set (match_operand:VHSDF 0 "register_operand" "=w")
3358        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3359                       (match_operand:VHSDF 2 "register_operand" "w")]
3360                       FMAXMIN_UNS))]
3361   "TARGET_SIMD"
3362   "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3363   [(set_attr "type" "neon_fp_minmax_<stype><q>")]
3364 )
3365
3366 ;; 'across lanes' add.
3367
3368 (define_insn "aarch64_faddp<mode><vczle><vczbe>"
3369  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3370        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
3371                       (match_operand:VHSDF 2 "register_operand" "w")]
3372         UNSPEC_FADDV))]
3373  "TARGET_SIMD"
3374  "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3375   [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
3376 )
3377
3378 (define_insn "reduc_plus_scal_<mode>"
3379  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3380        (unspec:<VEL> [(match_operand:VDQV 1 "register_operand" "w")]
3381                     UNSPEC_ADDV))]
3382  "TARGET_SIMD"
3383  "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
3384   [(set_attr "type" "neon_reduc_add<q>")]
3385 )
3386
3387 (define_insn "reduc_plus_scal_v2si"
3388  [(set (match_operand:SI 0 "register_operand" "=w")
3389        (unspec:SI [(match_operand:V2SI 1 "register_operand" "w")]
3390                     UNSPEC_ADDV))]
3391  "TARGET_SIMD"
3392  "addp\\t%0.2s, %1.2s, %1.2s"
3393   [(set_attr "type" "neon_reduc_add")]
3394 )
3395
3396 ;; ADDV with result zero-extended to SI/DImode (for popcount).
3397 (define_insn "aarch64_zero_extend<GPI:mode>_reduc_plus_<VDQV_E:mode>"
3398  [(set (match_operand:GPI 0 "register_operand" "=w")
3399        (zero_extend:GPI
3400         (unspec:<VDQV_E:VEL> [(match_operand:VDQV_E 1 "register_operand" "w")]
3401                              UNSPEC_ADDV)))]
3402  "TARGET_SIMD"
3403  "add<VDQV_E:vp>\\t%<VDQV_E:Vetype>0, %1.<VDQV_E:Vtype>"
3404   [(set_attr "type" "neon_reduc_add<VDQV_E:q>")]
3405 )
3406
3407 (define_insn "reduc_plus_scal_<mode>"
3408  [(set (match_operand:<VEL> 0 "register_operand" "=w")
3409        (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
3410                    UNSPEC_FADDV))]
3411  "TARGET_SIMD"
3412  "faddp\\t%<Vetype>0, %1.<Vtype>"
3413   [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
3414 )
3415
3416 (define_expand "reduc_plus_scal_v4sf"
3417  [(set (match_operand:SF 0 "register_operand")
3418        (unspec:SF [(match_operand:V4SF 1 "register_operand")]
3419                     UNSPEC_FADDV))]
3420  "TARGET_SIMD"
3421 {
3422   rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
3423   rtx scratch = gen_reg_rtx (V4SFmode);
3424   emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
3425   emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
3426   emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
3427   DONE;
3428 })
3429
3430 ;; SADDLV and UADDLV can be expressed as an ADDV instruction that first
3431 ;; sign or zero-extends its elements.
3432 (define_insn "aarch64_<su>addlv<mode>"
3433  [(set (match_operand:<VWIDE_S> 0 "register_operand" "=w")
3434        (unspec:<VWIDE_S>
3435          [(ANY_EXTEND:<V2XWIDE>
3436             (match_operand:VDQV_L 1 "register_operand" "w"))]
3437          UNSPEC_ADDV))]
3438  "TARGET_SIMD"
3439  "<su>addl<vp>\\t%<Vwstype>0<Vwsuf>, %1.<Vtype>"
3440   [(set_attr "type" "neon_reduc_add<q>")]
3441 )
3442
3443 ;; An ADDV over a vector PLUS of elements extracted and widened all from the
3444 ;; same vector is the same as an [SU]ADDLV above, so long as all the elements
3445 ;; of that vector are used.  We can greatly simplify the RTL expression using
3446 ;; this splitter.
3447 (define_insn_and_split "*aarch64_<su>addlv<mode>_reduction"
3448  [(set (match_operand:<VWIDE_S> 0 "register_operand")
3449        (unspec:<VWIDE_S>
3450          [(plus:<VDBLW>
3451             (vec_select:<VDBLW>
3452               (ANY_EXTEND:<V2XWIDE>
3453                 (match_operand:VDQV_L 1 "register_operand"))
3454               (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3455             (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3456               (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half")))]
3457          UNSPEC_ADDV))]
3458  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3459  "#"
3460  "&& 1"
3461   [(set (match_dup 0)
3462        (unspec:<VWIDE_S>
3463          [(ANY_EXTEND:<V2XWIDE>
3464             (match_dup 1))]
3465          UNSPEC_ADDV))]
3466   {}
3467 )
3468
3469 ;; Similar to the above but for two-step zero-widening reductions.
3470 ;; We can push the outer zero_extend outside the ADDV unspec and make
3471 ;; use of the implicit high-part zeroing semantics of UADDLV to do it all
3472 ;; in a single instruction.
3473 (define_insn_and_split "*aarch64_uaddlv<mode>_reduction_2"
3474  [(set (match_operand:<VWIDE2X_S> 0 "register_operand" "=w")
3475        (unspec:<VWIDE2X_S>
3476          [(zero_extend:<VQUADW>
3477             (plus:<VDBLW>
3478               (vec_select:<VDBLW>
3479                 (zero_extend:<V2XWIDE>
3480                   (match_operand:VDQQH 1 "register_operand" "w"))
3481                 (match_operand:<V2XWIDE> 2 "vect_par_cnst_select_half"))
3482               (vec_select:<VDBLW> (zero_extend:<V2XWIDE> (match_dup 1))
3483                 (match_operand:<V2XWIDE> 3 "vect_par_cnst_select_half"))))]
3484          UNSPEC_ADDV))]
3485  "TARGET_SIMD && !aarch64_pars_overlap_p (operands[2], operands[3])"
3486  "#"
3487  "&& 1"
3488   [(set (match_dup 0)
3489         (zero_extend:<VWIDE2X_S>
3490           (unspec:<VWIDE_S>
3491             [(zero_extend:<V2XWIDE>
3492                (match_dup 1))]
3493             UNSPEC_ADDV)))]
3494   {}
3495 )
3496
3497 ;; Zero-extending version of the above.  As these intrinsics produce a scalar
3498 ;; value that may be used by further intrinsics we want to avoid moving the
3499 ;; result into GP regs to do a zero-extension that ADDLV/ADDLP gives for free.
3500
3501 (define_insn "*aarch64_<su>addlv<VDQV_L:mode>_ze<GPI:mode>"
3502  [(set (match_operand:GPI 0 "register_operand" "=w")
3503        (zero_extend:GPI
3504          (unspec:<VWIDE_S>
3505            [(ANY_EXTEND:<VDQV_L:V2XWIDE>
3506               (match_operand:VDQV_L 1 "register_operand" "w"))]
3507          UNSPEC_ADDV)))]
3508  "TARGET_SIMD
3509   && (GET_MODE_SIZE (<GPI:MODE>mode) > GET_MODE_SIZE (<VWIDE_S>mode))"
3510  "<su>addl<VDQV_L:vp>\\t%<VDQV_L:Vwstype>0<VDQV_L:Vwsuf>, %1.<VDQV_L:Vtype>"
3511   [(set_attr "type" "neon_reduc_add<VDQV_L:q>")]
3512 )
3513
3514 (define_expand "@aarch64_<su>addlp<mode>"
3515   [(set (match_operand:<VDBLW> 0 "register_operand")
3516         (plus:<VDBLW>
3517           (vec_select:<VDBLW>
3518             (ANY_EXTEND:<V2XWIDE>
3519               (match_operand:VDQV_L 1 "register_operand"))
3520             (match_dup 2))
3521           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3522             (match_dup 3))))]
3523  "TARGET_SIMD"
3524  {
3525    int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant () / 2;
3526    operands[2] = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
3527    operands[3] = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
3528  }
3529 )
3530
3531 (define_insn "*aarch64_<su>addlp<mode><vczle><vczbe>_insn"
3532   [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
3533         (plus:<VDBLW>
3534           (vec_select:<VDBLW>
3535             (ANY_EXTEND:<V2XWIDE>
3536               (match_operand:VDQV_L 1 "register_operand" "w"))
3537             (match_operand:<V2XWIDE> 2 "vect_par_cnst_even_or_odd_half"))
3538           (vec_select:<VDBLW> (ANY_EXTEND:<V2XWIDE> (match_dup 1))
3539             (match_operand:<V2XWIDE> 3 "vect_par_cnst_even_or_odd_half"))))]
3540  "TARGET_SIMD
3541   && !rtx_equal_p (operands[2], operands[3])"
3542  "<su>addlp\\t%0.<Vwhalf>, %1.<Vtype>"
3543   [(set_attr "type" "neon_reduc_add<q>")]
3544 )
3545
3546 (define_insn "clrsb<mode>2<vczle><vczbe>"
3547   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3548         (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3549   "TARGET_SIMD"
3550   "cls\\t%0.<Vtype>, %1.<Vtype>"
3551   [(set_attr "type" "neon_cls<q>")]
3552 )
3553
3554 (define_insn "clz<mode>2<vczle><vczbe>"
3555  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3556        (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
3557  "TARGET_SIMD"
3558  "clz\\t%0.<Vtype>, %1.<Vtype>"
3559   [(set_attr "type" "neon_cls<q>")]
3560 )
3561
3562 (define_insn "popcount<mode>2<vczle><vczbe>"
3563   [(set (match_operand:VB 0 "register_operand" "=w")
3564         (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
3565   "TARGET_SIMD"
3566   "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
3567   [(set_attr "type" "neon_cnt<q>")]
3568 )
3569
3570 (define_expand "popcount<mode>2"
3571   [(set (match_operand:VDQHSD_V1DI 0 "register_operand")
3572         (popcount:VDQHSD_V1DI
3573           (match_operand:VDQHSD_V1DI 1 "register_operand")))]
3574   "TARGET_SIMD"
3575   {
3576     if (TARGET_SVE)
3577       {
3578         rtx p = aarch64_ptrue_reg (<VPRED>mode, <bitsize> == 64 ? 8 : 16);
3579         emit_insn (gen_aarch64_pred_popcount<mode> (operands[0],
3580                                                     p,
3581                                                     operands[1]));
3582         DONE;
3583       }
3584
3585     if (<MODE>mode == V1DImode)
3586       {
3587         rtx out = gen_reg_rtx (DImode);
3588         emit_insn (gen_popcountdi2 (out, gen_lowpart (DImode, operands[1])));
3589         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, out));
3590         DONE;
3591       }
3592
3593     /* Generate a byte popcount.  */
3594     machine_mode mode = <bitsize> == 64 ? V8QImode : V16QImode;
3595     machine_mode mode2 = <bitsize> == 64 ? V2SImode : V4SImode;
3596     rtx tmp = gen_reg_rtx (mode);
3597     auto icode = optab_handler (popcount_optab, mode);
3598     emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode, operands[1])));
3599
3600     if (TARGET_DOTPROD
3601         && (<VEL>mode == SImode || <VEL>mode == DImode))
3602       {
3603         /* For V4SI and V2SI, we can generate a UDOT with a 0 accumulator and a
3604            1 multiplicand.  For V2DI, another UAADDLP is needed.  */
3605         rtx ones = force_reg (mode, CONST1_RTX (mode));
3606         auto icode = convert_optab_handler (udot_prod_optab, mode2, mode);
3607         mode = <bitsize> == 64 ? V2SImode : V4SImode;
3608         rtx dest = mode == <MODE>mode ? operands[0] : gen_reg_rtx (mode);
3609         rtx zeros = force_reg (mode, CONST0_RTX (mode));
3610         emit_insn (GEN_FCN (icode) (dest, tmp, ones, zeros));
3611         tmp = dest;
3612       }
3613
3614     /* Use a sequence of UADDLPs to accumulate the counts.  Each step doubles
3615        the element size and halves the number of elements.  */
3616     while (mode != <MODE>mode)
3617       {
3618         auto icode = code_for_aarch64_addlp (ZERO_EXTEND, GET_MODE (tmp));
3619         mode = insn_data[icode].operand[0].mode;
3620         rtx dest = mode == <MODE>mode ? operands[0] : gen_reg_rtx (mode);
3621         emit_insn (GEN_FCN (icode) (dest, tmp));
3622         tmp = dest;
3623       }
3624     DONE;
3625   }
3626 )
3627
3628 ;; 'across lanes' max and min ops.
3629
3630 ;; Template for outputting a scalar, so we can create __builtins which can be
3631 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
3632 (define_expand "reduc_<optab>_scal_<mode>"
3633   [(match_operand:<VEL> 0 "register_operand")
3634    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3635                  FMAXMINV)]
3636   "TARGET_SIMD"
3637   {
3638     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3639     rtx scratch = gen_reg_rtx (<MODE>mode);
3640     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3641                                                          operands[1]));
3642     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3643     DONE;
3644   }
3645 )
3646
3647 (define_expand "reduc_<fmaxmin>_scal_<mode>"
3648   [(match_operand:<VEL> 0 "register_operand")
3649    (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
3650                  FMAXMINNMV)]
3651   "TARGET_SIMD"
3652   {
3653     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
3654     DONE;
3655   }
3656 )
3657
3658 ;; Likewise for integer cases, signed and unsigned.
3659 (define_expand "reduc_<optab>_scal_<mode>"
3660   [(match_operand:<VEL> 0 "register_operand")
3661    (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
3662                     MAXMINV)]
3663   "TARGET_SIMD"
3664   {
3665     rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
3666     rtx scratch = gen_reg_rtx (<MODE>mode);
3667     emit_insn (gen_aarch64_reduc_<optab>_internal<mode> (scratch,
3668                                                          operands[1]));
3669     emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
3670     DONE;
3671   }
3672 )
3673
3674 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3675  [(set (match_operand:VDQV_S 0 "register_operand" "=w")
3676        (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
3677                     MAXMINV))]
3678  "TARGET_SIMD"
3679  "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
3680   [(set_attr "type" "neon_reduc_minmax<q>")]
3681 )
3682
3683 (define_insn "aarch64_reduc_<optab>_internalv2si"
3684  [(set (match_operand:V2SI 0 "register_operand" "=w")
3685        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
3686                     MAXMINV))]
3687  "TARGET_SIMD"
3688  "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
3689   [(set_attr "type" "neon_reduc_minmax")]
3690 )
3691
3692 (define_insn "aarch64_reduc_<optab>_internal<mode>"
3693  [(set (match_operand:VHSDF 0 "register_operand" "=w")
3694        (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
3695                       FMAXMINV))]
3696  "TARGET_SIMD"
3697  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
3698   [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
3699 )
3700
3701 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
3702 ;; allocation.
3703 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
3704 ;; to select.
3705 ;;
3706 ;; Thus our BSL is of the form:
3707 ;;   op0 = bsl (mask, op2, op3)
3708 ;; We can use any of:
3709 ;;
3710 ;;   if (op0 = mask)
3711 ;;     bsl mask, op1, op2
3712 ;;   if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
3713 ;;     bit op0, op2, mask
3714 ;;   if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
3715 ;;     bif op0, op1, mask
3716 ;;
3717 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
3718 ;; Some forms of straight-line code may generate the equivalent form
3719 ;; in *aarch64_simd_bsl<mode>_alt.
3720
3721 (define_insn "aarch64_simd_bsl<mode>_internal<vczle><vczbe>"
3722   [(set (match_operand:VDQ_I 0 "register_operand")
3723         (xor:VDQ_I
3724            (and:VDQ_I
3725              (xor:VDQ_I
3726                (match_operand:<V_INT_EQUIV> 3 "register_operand")
3727                (match_operand:VDQ_I 2 "register_operand"))
3728              (match_operand:VDQ_I 1 "register_operand"))
3729           (match_dup:<V_INT_EQUIV> 3)
3730         ))]
3731   "TARGET_SIMD"
3732   {@ [ cons: =0 , 1 , 2 , 3  ]
3733      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
3734      [ w        , w , w , 0  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3735      [ w        , w , 0 , w  ] bif\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3736   }
3737   [(set_attr "type" "neon_bsl<q>")]
3738 )
3739
3740 ;; We need this form in addition to the above pattern to match the case
3741 ;; when combine tries merging three insns such that the second operand of
3742 ;; the outer XOR matches the second operand of the inner XOR rather than
3743 ;; the first.  The two are equivalent but since recog doesn't try all
3744 ;; permutations of commutative operations, we have to have a separate pattern.
3745
3746 (define_insn "*aarch64_simd_bsl<mode>_alt<vczle><vczbe>"
3747   [(set (match_operand:VDQ_I 0 "register_operand")
3748         (xor:VDQ_I
3749            (and:VDQ_I
3750              (xor:VDQ_I
3751                (match_operand:VDQ_I 3 "register_operand")
3752                (match_operand:<V_INT_EQUIV> 2 "register_operand"))
3753               (match_operand:VDQ_I 1 "register_operand"))
3754           (match_dup:<V_INT_EQUIV> 2)))]
3755   "TARGET_SIMD"
3756   {@ [ cons: =0 , 1 , 2 , 3  ]
3757      [ w        , 0 , w , w  ] bsl\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
3758      [ w        , w , 0 , w  ] bit\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
3759      [ w        , w , w , 0  ] bif\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
3760   }
3761   [(set_attr "type" "neon_bsl<q>")]
3762 )
3763
3764 ;; DImode is special, we want to avoid computing operations which are
3765 ;; more naturally computed in general purpose registers in the vector
3766 ;; registers.  If we do that, we need to move all three operands from general
3767 ;; purpose registers to vector registers, then back again.  However, we
3768 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
3769 ;; optimizations based on the component operations of a BSL.
3770 ;;
3771 ;; That means we need a splitter back to the individual operations, if they
3772 ;; would be better calculated on the integer side.
3773
3774 (define_insn_and_split "aarch64_simd_bsldi_internal"
3775   [(set (match_operand:DI 0 "register_operand")
3776         (xor:DI
3777            (and:DI
3778              (xor:DI
3779                (match_operand:DI 3 "register_operand")
3780                (match_operand:DI 2 "register_operand"))
3781              (match_operand:DI 1 "register_operand"))
3782           (match_dup:DI 3)
3783         ))]
3784   "TARGET_SIMD"
3785   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3786      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %2.8b, %3.8b
3787      [ w        , w , w , 0 ; neon_bsl    , 4      ] bit\t%0.8b, %2.8b, %1.8b
3788      [ w        , w , 0 , w ; neon_bsl    , 4      ] bif\t%0.8b, %3.8b, %1.8b
3789      [ &r       , r , r , r ; multiple    , 12     ] #
3790   }
3791   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3792   [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
3793 {
3794   /* Split back to individual operations.  If we're before reload, and
3795      able to create a temporary register, do so.  If we're after reload,
3796      we've got an early-clobber destination register, so use that.
3797      Otherwise, we can't create pseudos and we can't yet guarantee that
3798      operands[0] is safe to write, so FAIL to split.  */
3799
3800   rtx scratch;
3801   if (reload_completed)
3802     scratch = operands[0];
3803   else if (can_create_pseudo_p ())
3804     scratch = gen_reg_rtx (DImode);
3805   else
3806     FAIL;
3807
3808   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3809   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3810   emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
3811   DONE;
3812 }
3813 )
3814
3815 (define_insn_and_split "aarch64_simd_bsldi_alt"
3816   [(set (match_operand:DI 0 "register_operand")
3817         (xor:DI
3818            (and:DI
3819              (xor:DI
3820                (match_operand:DI 3 "register_operand")
3821                (match_operand:DI 2 "register_operand"))
3822              (match_operand:DI 1 "register_operand"))
3823           (match_dup:DI 2)
3824         ))]
3825   "TARGET_SIMD"
3826   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type , length ]
3827      [ w        , 0 , w , w ; neon_bsl    , 4      ] bsl\t%0.8b, %3.8b, %2.8b
3828      [ w        , w , 0 , w ; neon_bsl    , 4      ] bit\t%0.8b, %3.8b, %1.8b
3829      [ w        , w , w , 0 ; neon_bsl    , 4      ] bif\t%0.8b, %2.8b, %1.8b
3830      [ &r       , r , r , r ; multiple    , 12     ] #
3831   }
3832   "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
3833   [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
3834 {
3835   /* Split back to individual operations.  If we're before reload, and
3836      able to create a temporary register, do so.  If we're after reload,
3837      we've got an early-clobber destination register, so use that.
3838      Otherwise, we can't create pseudos and we can't yet guarantee that
3839      operands[0] is safe to write, so FAIL to split.  */
3840
3841   rtx scratch;
3842   if (reload_completed)
3843     scratch = operands[0];
3844   else if (can_create_pseudo_p ())
3845     scratch = gen_reg_rtx (DImode);
3846   else
3847     FAIL;
3848
3849   emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
3850   emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
3851   emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
3852   DONE;
3853 }
3854 )
3855
3856 (define_expand "@aarch64_simd_bsl<mode>"
3857   [(match_operand:VALLDIF 0 "register_operand")
3858    (match_operand:<V_INT_EQUIV> 1 "register_operand")
3859    (match_operand:VALLDIF 2 "register_operand")
3860    (match_operand:VALLDIF 3 "register_operand")]
3861  "TARGET_SIMD"
3862 {
3863   /* We can't alias operands together if they have different modes.  */
3864   rtx tmp = operands[0];
3865   if (FLOAT_MODE_P (<MODE>mode))
3866     {
3867       operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
3868       operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
3869       tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
3870     }
3871   operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
3872   emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
3873                                                          operands[1],
3874                                                          operands[2],
3875                                                          operands[3]));
3876   if (tmp != operands[0])
3877     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
3878
3879   DONE;
3880 })
3881
3882 (define_expand "vcond_mask_<mode><v_int_equiv>"
3883   [(match_operand:VALLDI 0 "register_operand")
3884    (match_operand:VALLDI 1 "nonmemory_operand")
3885    (match_operand:VALLDI 2 "nonmemory_operand")
3886    (match_operand:<V_INT_EQUIV> 3 "register_operand")]
3887   "TARGET_SIMD"
3888 {
3889   /* If we have (a = (P) ? -1 : 0);
3890      Then we can simply move the generated mask (result must be int).  */
3891   if (operands[1] == CONSTM1_RTX (<MODE>mode)
3892       && operands[2] == CONST0_RTX (<MODE>mode))
3893     emit_move_insn (operands[0], operands[3]);
3894   /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
3895   else if (operands[1] == CONST0_RTX (<MODE>mode)
3896            && operands[2] == CONSTM1_RTX (<MODE>mode))
3897     emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
3898   else
3899     {
3900       if (!REG_P (operands[1]))
3901         operands[1] = force_reg (<MODE>mode, operands[1]);
3902       if (!REG_P (operands[2]))
3903         operands[2] = force_reg (<MODE>mode, operands[2]);
3904       emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
3905                                              operands[1], operands[2]));
3906     }
3907
3908   DONE;
3909 })
3910
3911 ;; Patterns comparing two vectors and conditionally jump
3912
3913 (define_expand "cbranch<mode>4"
3914   [(set (pc)
3915         (if_then_else
3916           (match_operator 0 "aarch64_equality_operator"
3917             [(match_operand:VDQ_I 1 "register_operand")
3918              (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")])
3919           (label_ref (match_operand 3 ""))
3920           (pc)))]
3921   "TARGET_SIMD"
3922 {
3923   auto code = GET_CODE (operands[0]);
3924   rtx tmp = operands[1];
3925
3926   /* If comparing against a non-zero vector we have to do a comparison first
3927      so we can have a != 0 comparison with the result.  */
3928   if (operands[2] != CONST0_RTX (<MODE>mode))
3929     {
3930       tmp = gen_reg_rtx (<MODE>mode);
3931       emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2]));
3932     }
3933
3934   /* For 64-bit vectors we need no reductions.  */
3935   if (known_eq (128, GET_MODE_BITSIZE (<MODE>mode)))
3936     {
3937       /* Always reduce using a V4SI.  */
3938       rtx reduc = gen_lowpart (V4SImode, tmp);
3939       rtx res = gen_reg_rtx (V4SImode);
3940       emit_insn (gen_aarch64_umaxpv4si (res, reduc, reduc));
3941       emit_move_insn (tmp, gen_lowpart (<MODE>mode, res));
3942     }
3943
3944   rtx val = gen_reg_rtx (DImode);
3945   emit_move_insn (val, gen_lowpart (DImode, tmp));
3946
3947   rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
3948   rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
3949   emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
3950   DONE;
3951 })
3952
3953 ;; Patterns comparing two vectors to produce a mask.
3954
3955 (define_expand "vec_cmp<mode><mode>"
3956   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3957           (match_operator 1 "comparison_operator"
3958             [(match_operand:VSDQ_I_DI 2 "register_operand")
3959              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
3960   "TARGET_SIMD"
3961 {
3962   rtx mask = operands[0];
3963   enum rtx_code code = GET_CODE (operands[1]);
3964
3965   switch (code)
3966     {
3967     case NE:
3968     case LE:
3969     case LT:
3970     case GE:
3971     case GT:
3972     case EQ:
3973       if (operands[3] == CONST0_RTX (<MODE>mode))
3974         break;
3975
3976       /* Fall through.  */
3977     default:
3978       if (!REG_P (operands[3]))
3979         operands[3] = force_reg (<MODE>mode, operands[3]);
3980
3981       break;
3982     }
3983
3984   switch (code)
3985     {
3986     case LT:
3987       emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
3988       break;
3989
3990     case GE:
3991       emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
3992       break;
3993
3994     case LE:
3995       emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
3996       break;
3997
3998     case GT:
3999       emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
4000       break;
4001
4002     case LTU:
4003       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
4004       break;
4005
4006     case GEU:
4007       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
4008       break;
4009
4010     case LEU:
4011       emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
4012       break;
4013
4014     case GTU:
4015       emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
4016       break;
4017
4018     case NE:
4019       /* Handle NE as !EQ.  */
4020       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
4021       emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
4022       break;
4023
4024     case EQ:
4025       emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
4026       break;
4027
4028     default:
4029       gcc_unreachable ();
4030     }
4031
4032   DONE;
4033 })
4034
4035 (define_expand "vec_cmp<mode><v_int_equiv>"
4036   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
4037         (match_operator 1 "comparison_operator"
4038             [(match_operand:VDQF 2 "register_operand")
4039              (match_operand:VDQF 3 "nonmemory_operand")]))]
4040   "TARGET_SIMD"
4041 {
4042   int use_zero_form = 0;
4043   enum rtx_code code = GET_CODE (operands[1]);
4044   rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
4045
4046   rtx (*comparison) (rtx, rtx, rtx) = NULL;
4047
4048   switch (code)
4049     {
4050     case LE:
4051     case LT:
4052     case GE:
4053     case GT:
4054     case EQ:
4055       if (operands[3] == CONST0_RTX (<MODE>mode))
4056         {
4057           use_zero_form = 1;
4058           break;
4059         }
4060       /* Fall through.  */
4061     default:
4062       if (!REG_P (operands[3]))
4063         operands[3] = force_reg (<MODE>mode, operands[3]);
4064
4065       break;
4066     }
4067
4068   switch (code)
4069     {
4070     case LT:
4071       if (use_zero_form)
4072         {
4073           comparison = gen_aarch64_cmlt<mode>;
4074           break;
4075         }
4076       /* Fall through.  */
4077     case UNLT:
4078       std::swap (operands[2], operands[3]);
4079       /* Fall through.  */
4080     case UNGT:
4081     case GT:
4082       comparison = gen_aarch64_cmgt<mode>;
4083       break;
4084     case LE:
4085       if (use_zero_form)
4086         {
4087           comparison = gen_aarch64_cmle<mode>;
4088           break;
4089         }
4090       /* Fall through.  */
4091     case UNLE:
4092       std::swap (operands[2], operands[3]);
4093       /* Fall through.  */
4094     case UNGE:
4095     case GE:
4096       comparison = gen_aarch64_cmge<mode>;
4097       break;
4098     case NE:
4099     case EQ:
4100       comparison = gen_aarch64_cmeq<mode>;
4101       break;
4102     case UNEQ:
4103     case ORDERED:
4104     case UNORDERED:
4105     case LTGT:
4106       break;
4107     default:
4108       gcc_unreachable ();
4109     }
4110
4111   switch (code)
4112     {
4113     case UNGE:
4114     case UNGT:
4115     case UNLE:
4116     case UNLT:
4117       {
4118         /* All of the above must not raise any FP exceptions.  Thus we first
4119            check each operand for NaNs and force any elements containing NaN to
4120            zero before using them in the compare.
4121            Example: UN<cc> (a, b) -> UNORDERED (a, b) |
4122                                      (cm<cc> (isnan (a) ? 0.0 : a,
4123                                               isnan (b) ? 0.0 : b))
4124            We use the following transformations for doing the comparisions:
4125            a UNGE b -> a GE b
4126            a UNGT b -> a GT b
4127            a UNLE b -> b GE a
4128            a UNLT b -> b GT a.  */
4129
4130         rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
4131         rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
4132         rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
4133         emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
4134         emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
4135         emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
4136         emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
4137                                           lowpart_subreg (<V_INT_EQUIV>mode,
4138                                                           operands[2],
4139                                                           <MODE>mode)));
4140         emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
4141                                           lowpart_subreg (<V_INT_EQUIV>mode,
4142                                                           operands[3],
4143                                                           <MODE>mode)));
4144         gcc_assert (comparison != NULL);
4145         emit_insn (comparison (operands[0],
4146                                lowpart_subreg (<MODE>mode,
4147                                                tmp0, <V_INT_EQUIV>mode),
4148                                lowpart_subreg (<MODE>mode,
4149                                                tmp1, <V_INT_EQUIV>mode)));
4150         emit_insn (gen_iorn<v_int_equiv>3 (operands[0], operands[0], tmp2));
4151       }
4152       break;
4153
4154     case LT:
4155     case LE:
4156     case GT:
4157     case GE:
4158     case EQ:
4159     case NE:
4160       /* The easy case.  Here we emit one of FCMGE, FCMGT or FCMEQ.
4161          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
4162          a GE b -> a GE b
4163          a GT b -> a GT b
4164          a LE b -> b GE a
4165          a LT b -> b GT a
4166          a EQ b -> a EQ b
4167          a NE b -> ~(a EQ b)  */
4168       gcc_assert (comparison != NULL);
4169       emit_insn (comparison (operands[0], operands[2], operands[3]));
4170       if (code == NE)
4171         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4172       break;
4173
4174     case LTGT:
4175       /* LTGT is not guranteed to not generate a FP exception.  So let's
4176          go the faster way : ((a > b) || (b > a)).  */
4177       emit_insn (gen_aarch64_cmgt<mode> (operands[0],
4178                                          operands[2], operands[3]));
4179       emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
4180       emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
4181       break;
4182
4183     case ORDERED:
4184     case UNORDERED:
4185     case UNEQ:
4186       /* cmeq (a, a) & cmeq (b, b).  */
4187       emit_insn (gen_aarch64_cmeq<mode> (operands[0],
4188                                          operands[2], operands[2]));
4189       emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
4190       emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
4191
4192       if (code == UNORDERED)
4193         emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
4194       else if (code == UNEQ)
4195         {
4196           emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
4197           emit_insn (gen_iorn<v_int_equiv>3 (operands[0], tmp, operands[0]));
4198         }
4199       break;
4200
4201     default:
4202       gcc_unreachable ();
4203     }
4204
4205   DONE;
4206 })
4207
4208 (define_expand "vec_cmpu<mode><mode>"
4209   [(set (match_operand:VSDQ_I_DI 0 "register_operand")
4210           (match_operator 1 "comparison_operator"
4211             [(match_operand:VSDQ_I_DI 2 "register_operand")
4212              (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
4213   "TARGET_SIMD"
4214 {
4215   emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
4216                                       operands[2], operands[3]));
4217   DONE;
4218 })
4219
4220 ;; Patterns for AArch64 SIMD Intrinsics.
4221
4222 ;; Lane extraction with sign extension to general purpose register.
4223 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
4224   [(set (match_operand:GPI 0 "register_operand" "=r")
4225         (sign_extend:GPI
4226           (vec_select:<VDQQH:VEL>
4227             (match_operand:VDQQH 1 "register_operand" "w")
4228             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4229   "TARGET_SIMD"
4230   {
4231     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4232                                            INTVAL (operands[2]));
4233     return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
4234   }
4235   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4236 )
4237
4238 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
4239   [(set (match_operand:GPI 0 "register_operand" "=r")
4240         (zero_extend:GPI
4241           (vec_select:<VDQQH:VEL>
4242             (match_operand:VDQQH 1 "register_operand" "w")
4243             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4244   "TARGET_SIMD"
4245   {
4246     operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
4247                                            INTVAL (operands[2]));
4248     return "umov\\t%w0, %1.<VDQQH:Vetype>[%2]";
4249   }
4250   [(set_attr "type" "neon_to_gp<VDQQH:q>")]
4251 )
4252
4253 ;; Lane extraction of a value, neither sign nor zero extension
4254 ;; is guaranteed so upper bits should be considered undefined.
4255 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
4256 ;; Extracting lane zero is split into a simple move when it is between SIMD
4257 ;; registers or a store.
4258 (define_insn_and_split "@aarch64_get_lane<mode>"
4259   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
4260         (vec_select:<VEL>
4261           (match_operand:VALL_F16 1 "register_operand" "w, w, w")
4262           (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
4263   "TARGET_SIMD"
4264   {
4265     operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4266     switch (which_alternative)
4267       {
4268         case 0:
4269           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
4270         case 1:
4271           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
4272         case 2:
4273           return "st1\\t{%1.<Vetype>}[%2], %0";
4274         default:
4275           gcc_unreachable ();
4276       }
4277   }
4278  "&& reload_completed
4279   && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
4280  [(set (match_dup 0) (match_dup 1))]
4281  {
4282    operands[1] = aarch64_replace_reg_mode (operands[1], <VEL>mode);
4283  }
4284   [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
4285 )
4286
4287 (define_insn "*aarch64_get_high<mode>"
4288   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
4289         (vec_select:<VEL>
4290           (match_operand:VQ_2E 1 "register_operand" "w")
4291           (parallel [(match_operand:SI 2 "immediate_operand")])))]
4292   "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
4293   "fmov\t%0, %1.d[1]"
4294   [(set_attr "type" "f_mrc")]
4295 )
4296
4297 (define_insn "load_pair_lanes<mode>"
4298   [(set (match_operand:<VDBL> 0 "register_operand" "=w")
4299         (vec_concat:<VDBL>
4300            (match_operand:VDCSIF 1 "memory_operand" "Utq")
4301            (match_operand:VDCSIF 2 "memory_operand" "m")))]
4302   "TARGET_FLOAT
4303    && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
4304   "ldr\\t%<single_dtype>0, %1"
4305   [(set_attr "type" "neon_load1_1reg<dblq>")]
4306 )
4307
4308 ;; This STP pattern is a partial duplicate of the general vec_concat patterns
4309 ;; below.  The reason for having both of them is that the alternatives of
4310 ;; the later patterns do not have consistent register preferences: the STP
4311 ;; alternatives have no preference between GPRs and FPRs (and if anything,
4312 ;; the GPR form is more natural for scalar integers) whereas the other
4313 ;; alternatives *require* an FPR for operand 1 and prefer one for operand 2.
4314 ;;
4315 ;; Using "*" to hide the STP alternatives from the RA penalizes cases in
4316 ;; which the destination was always memory.  On the other hand, expressing
4317 ;; the true preferences makes GPRs seem more palatable than they really are
4318 ;; for register destinations.
4319 ;;
4320 ;; Despite that, we do still want the general form to have STP alternatives,
4321 ;; in order to handle cases where a register destination is spilled.
4322 ;;
4323 ;; The best compromise therefore seemed to be to have a dedicated STP
4324 ;; pattern to catch cases in which the destination was always memory.
4325 ;; This dedicated pattern must come first.
4326
4327 (define_insn "store_pair_lanes<mode>"
4328   [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand")
4329         (vec_concat:<VDBL>
4330            (match_operand:VDCSIF 1 "register_operand")
4331            (match_operand:VDCSIF 2 "register_operand")))]
4332   "TARGET_FLOAT"
4333   {@ [ cons: =0 , 1 , 2 ; attrs: type ]
4334      [ Umn      , w , w ; neon_stp    ] stp\t%<single_type>1, %<single_type>2, %y0
4335      [ Umn      , r , r ; store_16    ] stp\t%<single_wx>1, %<single_wx>2, %y0
4336   }
4337 )
4338
4339 ;; Form a vector whose least significant half comes from operand 1 and whose
4340 ;; most significant half comes from operand 2.  The register alternatives
4341 ;; tie the least significant half to the same register as the destination,
4342 ;; so that only the other half needs to be handled explicitly.  For the
4343 ;; reasons given above, the STP alternatives use ? for constraints that
4344 ;; the register alternatives either don't accept or themselves disparage.
4345
4346 (define_insn "*aarch64_combine_internal<mode>"
4347   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4348         (vec_concat:<VDBL>
4349           (match_operand:VDCSIF 1 "register_operand")
4350           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")))]
4351   "TARGET_FLOAT
4352    && !BYTES_BIG_ENDIAN
4353    && (register_operand (operands[0], <VDBL>mode)
4354        || register_operand (operands[2], <MODE>mode))"
4355   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4356      [ w        , w  , w   ; neon_permute<dblq>        , simd  ] uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
4357      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4358      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4359      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4360      [ Umn      , ?w , w   ; neon_stp                  , *     ] stp\t%<single_type>1, %<single_type>2, %y0
4361      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>1, %<single_wx>2, %y0
4362   }
4363 )
4364
4365 (define_insn "*aarch64_combine_internal_be<mode>"
4366   [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand")
4367         (vec_concat:<VDBL>
4368           (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand")
4369           (match_operand:VDCSIF 1 "register_operand")))]
4370   "TARGET_FLOAT
4371    && BYTES_BIG_ENDIAN
4372    && (register_operand (operands[0], <VDBL>mode)
4373        || register_operand (operands[2], <MODE>mode))"
4374   {@ [ cons: =0 , 1  , 2   ; attrs: type               , arch  ]
4375      [ w        , w  , w   ; neon_permute<dblq>        , simd  ] uzp1\t%0.2<single_type>, %1.2<single_type>, %2.2<single_type>
4376      [ w        , 0  , ?r  ; neon_from_gp<dblq>        , simd  ] ins\t%0.<single_type>[1], %<single_wx>2
4377      [ w        , 0  , ?r  ; f_mcr                     , *     ] fmov\t%0.d[1], %2
4378      [ w        , 0  , Utv ; neon_load1_one_lane<dblq> , simd  ] ld1\t{%0.<single_type>}[1], %2
4379      [ Umn      , ?w , ?w  ; neon_stp                  , *     ] stp\t%<single_type>2, %<single_type>1, %y0
4380      [ Umn      , ?r , ?r  ; store_16                  , *     ] stp\t%<single_wx>2, %<single_wx>1, %y0
4381   }
4382 )
4383
4384 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4385 ;; dest vector.
4386
4387 (define_insn "*aarch64_combinez<mode>"
4388   [(set (match_operand:<VDBL> 0 "register_operand")
4389         (vec_concat:<VDBL>
4390           (match_operand:VDCSIF 1 "nonimmediate_operand")
4391           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
4392   "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
4393   {@ [ cons: =0 , 1  ; attrs: type      ]
4394      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4395      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4396      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4397   }
4398 )
4399
4400 (define_insn "*aarch64_combinez_be<mode>"
4401   [(set (match_operand:<VDBL> 0 "register_operand")
4402         (vec_concat:<VDBL>
4403           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
4404           (match_operand:VDCSIF 1 "nonimmediate_operand")))]
4405   "TARGET_FLOAT && BYTES_BIG_ENDIAN"
4406   {@ [ cons: =0 , 1  ; attrs: type      ]
4407      [ w        , w  ; neon_move<q>     ] fmov\t%<single_type>0, %<single_type>1
4408      [ w        , ?r ; neon_from_gp     ] fmov\t%<single_type>0, %<single_wx>1
4409      [ w        , m  ; neon_load1_1reg  ] ldr\t%<single_type>0, %1
4410   }
4411 )
4412
4413 ;; Form a vector whose first half (in array order) comes from operand 1
4414 ;; and whose second half (in array order) comes from operand 2.
4415 ;; This operand order follows the RTL vec_concat operation.
4416 (define_expand "@aarch64_vec_concat<mode>"
4417   [(set (match_operand:<VDBL> 0 "register_operand")
4418         (vec_concat:<VDBL>
4419           (match_operand:VDCSIF 1 "general_operand")
4420           (match_operand:VDCSIF 2 "general_operand")))]
4421   "TARGET_FLOAT"
4422 {
4423   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
4424   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
4425
4426   if (MEM_P (operands[1])
4427       && MEM_P (operands[2])
4428       && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2]))
4429     /* Use load_pair_lanes<mode>.  */
4430     ;
4431   else if (operands[hi] == CONST0_RTX (<MODE>mode))
4432     {
4433       /* Use *aarch64_combinez<mode>.  */
4434       if (!nonimmediate_operand (operands[lo], <MODE>mode))
4435         operands[lo] = force_reg (<MODE>mode, operands[lo]);
4436     }
4437   else
4438     {
4439       /* Use *aarch64_combine_internal<mode>.  */
4440       operands[lo] = force_reg (<MODE>mode, operands[lo]);
4441       if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
4442         {
4443           if (MEM_P (operands[hi]))
4444             {
4445               rtx addr = force_reg (Pmode, XEXP (operands[hi], 0));
4446               operands[hi] = replace_equiv_address (operands[hi], addr);
4447             }
4448           else
4449             operands[hi] = force_reg (<MODE>mode, operands[hi]);
4450         }
4451     }
4452 })
4453
4454 ;; Form a vector whose least significant half comes from operand 1 and whose
4455 ;; most significant half comes from operand 2.  This operand order follows
4456 ;; arm_neon.h vcombine* intrinsics.
4457 (define_expand "@aarch64_combine<mode>"
4458   [(match_operand:<VDBL> 0 "register_operand")
4459    (match_operand:VDC 1 "general_operand")
4460    (match_operand:VDC 2 "general_operand")]
4461   "TARGET_FLOAT"
4462 {
4463   if (BYTES_BIG_ENDIAN)
4464     std::swap (operands[1], operands[2]);
4465   emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1],
4466                                            operands[2]));
4467   DONE;
4468 }
4469 )
4470
4471 ;; <su><addsub>l<q>.
4472
4473 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
4474  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4475        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4476                            (match_operand:VQW 1 "register_operand" "w")
4477                            (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4478                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4479                            (match_operand:VQW 2 "register_operand" "w")
4480                            (match_dup 3)))))]
4481   "TARGET_SIMD"
4482   "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4483   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4484 )
4485
4486 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
4487  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4488        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4489                            (match_operand:VQW 1 "register_operand" "w")
4490                            (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4491                        (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
4492                            (match_operand:VQW 2 "register_operand" "w")
4493                            (match_dup 3)))))]
4494   "TARGET_SIMD"
4495   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
4496   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4497 )
4498
4499 (define_expand "vec_widen_<su>add_lo_<mode>"
4500   [(match_operand:<VWIDE> 0 "register_operand")
4501    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4502    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4503   "TARGET_SIMD"
4504 {
4505   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4506   emit_insn (gen_aarch64_<su>addl<mode>_lo_internal (operands[0], operands[1],
4507                                                      operands[2], p));
4508   DONE;
4509 })
4510
4511 (define_expand "vec_widen_<su>add_hi_<mode>"
4512   [(match_operand:<VWIDE> 0 "register_operand")
4513    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4514    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4515   "TARGET_SIMD"
4516 {
4517   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4518   emit_insn (gen_aarch64_<su>addl<mode>_hi_internal (operands[0], operands[1],
4519                                                      operands[2], p));
4520   DONE;
4521 })
4522
4523 (define_expand "vec_widen_<su>sub_lo_<mode>"
4524   [(match_operand:<VWIDE> 0 "register_operand")
4525    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4526    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4527   "TARGET_SIMD"
4528 {
4529   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4530   emit_insn (gen_aarch64_<su>subl<mode>_lo_internal (operands[0], operands[1],
4531                                                      operands[2], p));
4532   DONE;
4533 })
4534
4535 (define_expand "vec_widen_<su>sub_hi_<mode>"
4536   [(match_operand:<VWIDE> 0 "register_operand")
4537    (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))
4538    (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand"))]
4539   "TARGET_SIMD"
4540 {
4541   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4542   emit_insn (gen_aarch64_<su>subl<mode>_hi_internal (operands[0], operands[1],
4543                                                      operands[2], p));
4544   DONE;
4545 })
4546
4547 (define_expand "aarch64_saddl2<mode>"
4548   [(match_operand:<VWIDE> 0 "register_operand")
4549    (match_operand:VQW 1 "register_operand")
4550    (match_operand:VQW 2 "register_operand")]
4551   "TARGET_SIMD"
4552 {
4553   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4554   emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
4555                                                   operands[2], p));
4556   DONE;
4557 })
4558
4559 (define_expand "aarch64_uaddl2<mode>"
4560   [(match_operand:<VWIDE> 0 "register_operand")
4561    (match_operand:VQW 1 "register_operand")
4562    (match_operand:VQW 2 "register_operand")]
4563   "TARGET_SIMD"
4564 {
4565   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4566   emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
4567                                                   operands[2], p));
4568   DONE;
4569 })
4570
4571 (define_expand "aarch64_ssubl2<mode>"
4572   [(match_operand:<VWIDE> 0 "register_operand")
4573    (match_operand:VQW 1 "register_operand")
4574    (match_operand:VQW 2 "register_operand")]
4575   "TARGET_SIMD"
4576 {
4577   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4578   emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
4579                                                 operands[2], p));
4580   DONE;
4581 })
4582
4583 (define_expand "aarch64_usubl2<mode>"
4584   [(match_operand:<VWIDE> 0 "register_operand")
4585    (match_operand:VQW 1 "register_operand")
4586    (match_operand:VQW 2 "register_operand")]
4587   "TARGET_SIMD"
4588 {
4589   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4590   emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
4591                                                 operands[2], p));
4592   DONE;
4593 })
4594
4595 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
4596  [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4597        (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
4598                            (match_operand:VD_BHSI 1 "register_operand" "w"))
4599                        (ANY_EXTEND:<VWIDE>
4600                            (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4601   "TARGET_SIMD"
4602   "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
4603   [(set_attr "type" "neon_<ADDSUB:optab>_long")]
4604 )
4605
4606 ;; <su><addsub>w<q>.
4607
4608 (define_expand "widen_ssum<mode>3"
4609   [(set (match_operand:<VDBLW> 0 "register_operand")
4610         (plus:<VDBLW> (sign_extend:<VDBLW>
4611                         (match_operand:VQW 1 "register_operand"))
4612                       (match_operand:<VDBLW> 2 "register_operand")))]
4613   "TARGET_SIMD"
4614   {
4615     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4616     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4617
4618     emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
4619                                                 operands[1], p));
4620     emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
4621     DONE;
4622   }
4623 )
4624
4625 (define_expand "widen_ssum<mode>3"
4626   [(set (match_operand:<VWIDE> 0 "register_operand")
4627         (plus:<VWIDE> (sign_extend:<VWIDE>
4628                         (match_operand:VD_BHSI 1 "register_operand"))
4629                       (match_operand:<VWIDE> 2 "register_operand")))]
4630   "TARGET_SIMD"
4631 {
4632   emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
4633   DONE;
4634 })
4635
4636 (define_expand "widen_usum<mode>3"
4637   [(set (match_operand:<VDBLW> 0 "register_operand")
4638         (plus:<VDBLW> (zero_extend:<VDBLW>
4639                         (match_operand:VQW 1 "register_operand"))
4640                       (match_operand:<VDBLW> 2 "register_operand")))]
4641   "TARGET_SIMD"
4642   {
4643     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
4644     rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
4645
4646     emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
4647                                                  operands[1], p));
4648     emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
4649     DONE;
4650   }
4651 )
4652
4653 (define_expand "widen_usum<mode>3"
4654   [(set (match_operand:<VWIDE> 0 "register_operand")
4655         (plus:<VWIDE> (zero_extend:<VWIDE>
4656                         (match_operand:VD_BHSI 1 "register_operand"))
4657                       (match_operand:<VWIDE> 2 "register_operand")))]
4658   "TARGET_SIMD"
4659 {
4660   emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
4661   DONE;
4662 })
4663
4664 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
4665   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4666         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4667           (ANY_EXTEND:<VWIDE>
4668             (match_operand:VD_BHSI 2 "register_operand" "w"))))]
4669   "TARGET_SIMD"
4670   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4671   [(set_attr "type" "neon_sub_widen")]
4672 )
4673
4674 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
4675   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4676         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4677           (ANY_EXTEND:<VWIDE>
4678             (vec_select:<VHALF>
4679               (match_operand:VQW 2 "register_operand" "w")
4680               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
4681   "TARGET_SIMD"
4682   "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4683   [(set_attr "type" "neon_sub_widen")]
4684 )
4685
4686 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
4687   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4688         (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
4689           (ANY_EXTEND:<VWIDE>
4690             (vec_select:<VHALF>
4691               (match_operand:VQW 2 "register_operand" "w")
4692               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
4693   "TARGET_SIMD"
4694   "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4695   [(set_attr "type" "neon_sub_widen")]
4696 )
4697
4698 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
4699   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4700         (plus:<VWIDE>
4701           (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
4702           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4703   "TARGET_SIMD"
4704   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4705   [(set_attr "type" "neon_add_widen")]
4706 )
4707
4708 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
4709   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4710         (plus:<VWIDE>
4711           (ANY_EXTEND:<VWIDE>
4712             (vec_select:<VHALF>
4713               (match_operand:VQW 2 "register_operand" "w")
4714               (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
4715           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4716   "TARGET_SIMD"
4717   "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
4718   [(set_attr "type" "neon_add_widen")]
4719 )
4720
4721 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
4722   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4723         (plus:<VWIDE>
4724           (ANY_EXTEND:<VWIDE>
4725             (vec_select:<VHALF>
4726               (match_operand:VQW 2 "register_operand" "w")
4727               (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
4728           (match_operand:<VWIDE> 1 "register_operand" "w")))]
4729   "TARGET_SIMD"
4730   "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
4731   [(set_attr "type" "neon_add_widen")]
4732 )
4733
4734 (define_expand "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>"
4735   [(set (match_operand:<VWIDE> 0 "register_operand")
4736         (ADDSUB:<VWIDE>
4737           (ANY_EXTEND:<VWIDE>
4738             (vec_select:<VHALF>
4739               (match_operand:VQW 2 "register_operand")
4740               (match_dup 3)))
4741           (match_operand:<VWIDE> 1 "register_operand")))]
4742   "TARGET_SIMD"
4743 {
4744   /* We still do an emit_insn rather than relying on the pattern above
4745      because for the MINUS case the operands would need to be swapped
4746      around.  */
4747   operands[3]
4748     = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4749   emit_insn (gen_aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal(
4750                                                        operands[0],
4751                                                        operands[1],
4752                                                        operands[2],
4753                                                        operands[3]));
4754   DONE;
4755 })
4756
4757 ;; <su><r>h<addsub>.
4758
4759 (define_expand "<su_optab>avg<mode>3_floor"
4760   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4761         (truncate:VDQ_BHSI
4762           (ashiftrt:<V2XWIDE>
4763             (plus:<V2XWIDE>
4764               (ANY_EXTEND:<V2XWIDE>
4765                 (match_operand:VDQ_BHSI 1 "register_operand"))
4766               (ANY_EXTEND:<V2XWIDE>
4767                 (match_operand:VDQ_BHSI 2 "register_operand")))
4768             (match_dup 3))))]
4769   "TARGET_SIMD"
4770   {
4771     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4772   }
4773 )
4774
4775 (define_expand "<su_optab>avg<mode>3_ceil"
4776   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4777         (truncate:VDQ_BHSI
4778           (ashiftrt:<V2XWIDE>
4779             (plus:<V2XWIDE>
4780               (plus:<V2XWIDE>
4781                 (ANY_EXTEND:<V2XWIDE>
4782                   (match_operand:VDQ_BHSI 1 "register_operand"))
4783                 (ANY_EXTEND:<V2XWIDE>
4784                   (match_operand:VDQ_BHSI 2 "register_operand")))
4785                (match_dup 3))
4786             (match_dup 3))))]
4787   "TARGET_SIMD"
4788   {
4789     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4790   }
4791 )
4792
4793 (define_expand "aarch64_<su>hsub<mode>"
4794   [(set (match_operand:VDQ_BHSI 0 "register_operand")
4795         (truncate:VDQ_BHSI
4796           (ashiftrt:<V2XWIDE>
4797             (minus:<V2XWIDE>
4798               (ANY_EXTEND:<V2XWIDE>
4799                 (match_operand:VDQ_BHSI 1 "register_operand"))
4800               (ANY_EXTEND:<V2XWIDE>
4801                 (match_operand:VDQ_BHSI 2 "register_operand")))
4802             (match_dup 3))))]
4803   "TARGET_SIMD"
4804   {
4805     operands[3] = CONST1_RTX (<V2XWIDE>mode);
4806   }
4807 )
4808
4809 (define_insn "*aarch64_<su>h<ADDSUB:optab><mode><vczle><vczbe>_insn"
4810   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4811         (truncate:VDQ_BHSI
4812           (ashiftrt:<V2XWIDE>
4813             (ADDSUB:<V2XWIDE>
4814               (ANY_EXTEND:<V2XWIDE>
4815                 (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4816               (ANY_EXTEND:<V2XWIDE>
4817                 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4818             (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))))]
4819   "TARGET_SIMD"
4820   "<su>h<ADDSUB:optab>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4821   [(set_attr "type" "neon_<ADDSUB:optab>_halve<q>")]
4822 )
4823
4824 (define_insn "*aarch64_<su>rhadd<mode><vczle><vczbe>_insn"
4825   [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
4826         (truncate:VDQ_BHSI
4827           (ashiftrt:<V2XWIDE>
4828             (plus:<V2XWIDE>
4829               (plus:<V2XWIDE>
4830                 (ANY_EXTEND:<V2XWIDE>
4831                   (match_operand:VDQ_BHSI 1 "register_operand" "w"))
4832                 (ANY_EXTEND:<V2XWIDE>
4833                   (match_operand:VDQ_BHSI 2 "register_operand" "w")))
4834                (match_operand:<V2XWIDE> 3 "aarch64_simd_imm_one"))
4835             (match_dup 3))))]
4836   "TARGET_SIMD"
4837   "<su>rhadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4838   [(set_attr "type" "neon_add_halve<q>")]
4839 )
4840
4841 ;; <r><addsub>hn<q>.
4842
4843 (define_insn "aarch64_<optab>hn<mode>_insn<vczle><vczbe>"
4844   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4845         (truncate:<VNARROWQ>
4846           (ashiftrt:VQN
4847             (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4848                         (match_operand:VQN 2 "register_operand" "w"))
4849             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top"))))]
4850   "TARGET_SIMD"
4851   "<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4852   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4853 )
4854
4855 (define_insn "aarch64_r<optab>hn<mode>_insn<vczle><vczbe>"
4856   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4857         (truncate:<VNARROWQ>
4858           (ashiftrt:VQN
4859             (plus:VQN
4860               (ADDSUB:VQN (match_operand:VQN 1 "register_operand" "w")
4861                           (match_operand:VQN 2 "register_operand" "w"))
4862               (match_operand:VQN 3 "aarch64_simd_raddsubhn_imm_vec"))
4863             (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top"))))]
4864   "TARGET_SIMD"
4865   "r<optab>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
4866   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4867 )
4868
4869 (define_expand "aarch64_<optab>hn<mode>"
4870   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4871         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4872                     (match_operand:VQN 2 "register_operand")))]
4873   "TARGET_SIMD"
4874   {
4875     rtx shft
4876       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4877                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4878     emit_insn (gen_aarch64_<optab>hn<mode>_insn (operands[0], operands[1],
4879                                                  operands[2], shft));
4880     DONE;
4881   }
4882 )
4883
4884 (define_expand "aarch64_r<optab>hn<mode>"
4885   [(set (match_operand:<VNARROWQ> 0 "register_operand")
4886         (ADDSUB:VQN (match_operand:VQN 1 "register_operand")
4887                     (match_operand:VQN 2 "register_operand")))]
4888   "TARGET_SIMD"
4889   {
4890     rtx shft
4891       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4892                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4893     rtx rnd
4894       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4895         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4896     emit_insn (gen_aarch64_r<optab>hn<mode>_insn (operands[0], operands[1],
4897                                                   operands[2], rnd, shft));
4898     DONE;
4899   }
4900 )
4901
4902 (define_insn "aarch64_<optab>hn2<mode>_insn_le"
4903   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4904         (vec_concat:<VNARROWQ2>
4905           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4906           (truncate:<VNARROWQ>
4907             (ashiftrt:VQN
4908               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4909                           (match_operand:VQN 3 "register_operand" "w"))
4910               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))))]
4911   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4912   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4913   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4914 )
4915
4916 (define_insn "aarch64_r<optab>hn2<mode>_insn_le"
4917   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4918         (vec_concat:<VNARROWQ2>
4919           (match_operand:<VNARROWQ> 1 "register_operand" "0")
4920           (truncate:<VNARROWQ>
4921             (ashiftrt:VQN
4922               (plus:VQN
4923                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4924                             (match_operand:VQN 3 "register_operand" "w"))
4925                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4926               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))))]
4927   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
4928   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4929   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4930 )
4931
4932 (define_insn "aarch64_<optab>hn2<mode>_insn_be"
4933   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4934         (vec_concat:<VNARROWQ2>
4935           (truncate:<VNARROWQ>
4936             (ashiftrt:VQN
4937               (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4938                           (match_operand:VQN 3 "register_operand" "w"))
4939               (match_operand:VQN 4 "aarch64_simd_shift_imm_vec_exact_top")))
4940           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4941   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4942   "<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4943   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4944 )
4945
4946 (define_insn "aarch64_r<optab>hn2<mode>_insn_be"
4947   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
4948         (vec_concat:<VNARROWQ2>
4949           (truncate:<VNARROWQ>
4950             (ashiftrt:VQN
4951               (plus:VQN
4952                 (ADDSUB:VQN (match_operand:VQN 2 "register_operand" "w")
4953                             (match_operand:VQN 3 "register_operand" "w"))
4954                 (match_operand:VQN 4 "aarch64_simd_raddsubhn_imm_vec"))
4955               (match_operand:VQN 5 "aarch64_simd_shift_imm_vec_exact_top")))
4956           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
4957   "TARGET_SIMD && BYTES_BIG_ENDIAN"
4958   "r<optab>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
4959   [(set_attr "type" "neon_<optab>_halve_narrow_q")]
4960 )
4961
4962 (define_expand "aarch64_<optab>hn2<mode>"
4963   [(match_operand:<VNARROWQ2> 0 "register_operand")
4964    (match_operand:<VNARROWQ> 1 "register_operand")
4965    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
4966                (match_operand:VQN 3 "register_operand"))]
4967   "TARGET_SIMD"
4968   {
4969     rtx shft
4970       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4971                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4972     if (BYTES_BIG_ENDIAN)
4973       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_be (operands[0],
4974                                 operands[1], operands[2], operands[3], shft));
4975     else
4976       emit_insn (gen_aarch64_<optab>hn2<mode>_insn_le (operands[0],
4977                                 operands[1], operands[2], operands[3], shft));
4978     DONE;
4979   }
4980 )
4981
4982 (define_expand "aarch64_r<optab>hn2<mode>"
4983   [(match_operand:<VNARROWQ2> 0 "register_operand")
4984    (match_operand:<VNARROWQ> 1 "register_operand")
4985    (ADDSUB:VQN (match_operand:VQN 2 "register_operand")
4986                (match_operand:VQN 3 "register_operand"))]
4987   "TARGET_SIMD"
4988   {
4989     rtx shft
4990       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4991                                 GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
4992     rtx rnd
4993       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
4994         HOST_WIDE_INT_1U << (GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2 - 1));
4995     if (BYTES_BIG_ENDIAN)
4996       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_be (operands[0],
4997                                 operands[1], operands[2], operands[3], rnd, shft));
4998     else
4999       emit_insn (gen_aarch64_r<optab>hn2<mode>_insn_le (operands[0],
5000                                 operands[1], operands[2], operands[3], rnd, shft));
5001     DONE;
5002   }
5003 )
5004
5005 ;; Optimize ((a + b) >> n) + c where n is half the bitsize of the vector
5006 (define_insn_and_split "*bitmask_shift_plus<mode>"
5007   [(set (match_operand:VQN 0 "register_operand" "=&w")
5008         (plus:VQN
5009           (lshiftrt:VQN
5010             (plus:VQN (match_operand:VQN 1 "register_operand" "w")
5011                       (match_operand:VQN 2 "register_operand" "w"))
5012             (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_exact_top" ""))
5013           (match_operand:VQN 4 "register_operand" "w")))]
5014   "TARGET_SIMD"
5015   "#"
5016   "&& true"
5017   [(const_int 0)]
5018 {
5019   rtx tmp;
5020   if (can_create_pseudo_p ())
5021     tmp = gen_reg_rtx (<VNARROWQ>mode);
5022   else
5023     tmp = gen_rtx_REG (<VNARROWQ>mode, REGNO (operands[0]));
5024   emit_insn (gen_aarch64_addhn<mode> (tmp, operands[1], operands[2]));
5025   emit_insn (gen_aarch64_uaddw<Vnarrowq> (operands[0], operands[4], tmp));
5026   DONE;
5027 })
5028
5029 ;; pmul.
5030
5031 (define_insn "aarch64_pmul<mode>"
5032   [(set (match_operand:VB 0 "register_operand" "=w")
5033         (unspec:VB [(match_operand:VB 1 "register_operand" "w")
5034                     (match_operand:VB 2 "register_operand" "w")]
5035                    UNSPEC_PMUL))]
5036  "TARGET_SIMD"
5037  "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5038   [(set_attr "type" "neon_mul_<Vetype><q>")]
5039 )
5040
5041 (define_insn "aarch64_pmullv8qi"
5042   [(set (match_operand:V8HI 0 "register_operand" "=w")
5043         (unspec:V8HI [(match_operand:V8QI 1 "register_operand" "w")
5044                       (match_operand:V8QI 2 "register_operand" "w")]
5045                      UNSPEC_PMULL))]
5046  "TARGET_SIMD"
5047  "pmull\\t%0.8h, %1.8b, %2.8b"
5048   [(set_attr "type" "neon_mul_b_long")]
5049 )
5050
5051 (define_insn "aarch64_pmull_hiv16qi_insn"
5052   [(set (match_operand:V8HI 0 "register_operand" "=w")
5053         (unspec:V8HI
5054           [(vec_select:V8QI
5055              (match_operand:V16QI 1 "register_operand" "w")
5056              (match_operand:V16QI 3 "vect_par_cnst_hi_half" ""))
5057            (vec_select:V8QI
5058              (match_operand:V16QI 2 "register_operand" "w")
5059              (match_dup 3))]
5060           UNSPEC_PMULL))]
5061  "TARGET_SIMD"
5062  "pmull2\\t%0.8h, %1.16b, %2.16b"
5063   [(set_attr "type" "neon_mul_b_long")]
5064 )
5065
5066 (define_expand "aarch64_pmull_hiv16qi"
5067   [(match_operand:V8HI 0 "register_operand")
5068    (match_operand:V16QI 1 "register_operand")
5069    (match_operand:V16QI 2 "register_operand")]
5070  "TARGET_SIMD"
5071  {
5072    rtx p = aarch64_simd_vect_par_cnst_half (V16QImode, 16, true);
5073    emit_insn (gen_aarch64_pmull_hiv16qi_insn (operands[0], operands[1],
5074                                               operands[2], p));
5075    DONE;
5076  }
5077 )
5078
5079 ;; fmulx.
5080
5081 (define_insn "aarch64_fmulx<mode>"
5082   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5083         (unspec:VHSDF_HSDF
5084           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5085            (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5086            UNSPEC_FMULX))]
5087  "TARGET_SIMD"
5088  "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5089  [(set_attr "type" "neon_fp_mul_<stype>")]
5090 )
5091
5092 ;; vmulxq_lane_f32, and vmulx_laneq_f32
5093
5094 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
5095   [(set (match_operand:VDQSF 0 "register_operand" "=w")
5096         (unspec:VDQSF
5097          [(match_operand:VDQSF 1 "register_operand" "w")
5098           (vec_duplicate:VDQSF
5099            (vec_select:<VEL>
5100             (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
5101             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5102          UNSPEC_FMULX))]
5103   "TARGET_SIMD"
5104   {
5105     operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
5106     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5107   }
5108   [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
5109 )
5110
5111 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
5112
5113 (define_insn "*aarch64_mulx_elt<mode>"
5114   [(set (match_operand:VDQF 0 "register_operand" "=w")
5115         (unspec:VDQF
5116          [(match_operand:VDQF 1 "register_operand" "w")
5117           (vec_duplicate:VDQF
5118            (vec_select:<VEL>
5119             (match_operand:VDQF 2 "register_operand" "w")
5120             (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
5121          UNSPEC_FMULX))]
5122   "TARGET_SIMD"
5123   {
5124     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5125     return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
5126   }
5127   [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
5128 )
5129
5130 ;; vmulxq_lane
5131
5132 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
5133   [(set (match_operand:VHSDF 0 "register_operand" "=w")
5134         (unspec:VHSDF
5135          [(match_operand:VHSDF 1 "register_operand" "w")
5136           (vec_duplicate:VHSDF
5137             (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5138          UNSPEC_FMULX))]
5139   "TARGET_SIMD"
5140   "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
5141   [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
5142 )
5143
5144 ;; vmulxs_lane_f32, vmulxs_laneq_f32
5145 ;; vmulxd_lane_f64 ==  vmulx_lane_f64
5146 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
5147
5148 (define_insn "*aarch64_vgetfmulx<mode>"
5149   [(set (match_operand:<VEL> 0 "register_operand" "=w")
5150         (unspec:<VEL>
5151          [(match_operand:<VEL> 1 "register_operand" "w")
5152           (vec_select:<VEL>
5153            (match_operand:VDQF 2 "register_operand" "w")
5154             (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5155          UNSPEC_FMULX))]
5156   "TARGET_SIMD"
5157   {
5158     operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5159     return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
5160   }
5161   [(set_attr "type" "fmul<Vetype>")]
5162 )
5163 ;; <su>q<addsub>
5164
5165 (define_insn "<su_optab>s<addsub><mode>3<vczle><vczbe>"
5166   [(set (match_operand:VSDQ_I_QI_HI 0 "register_operand" "=w")
5167         (BINQOPS:VSDQ_I_QI_HI
5168           (match_operand:VSDQ_I_QI_HI 1 "register_operand" "w")
5169           (match_operand:VSDQ_I_QI_HI 2 "register_operand" "w")))]
5170   "TARGET_SIMD"
5171   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5172   [(set_attr "type" "neon_q<addsub><q>")]
5173 )
5174
5175 (define_expand "<su_optab>s<addsub><mode>3"
5176   [(parallel
5177     [(set (match_operand:GPI 0 "register_operand")
5178           (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5179                         (match_operand:GPI 2 "aarch64_plus_operand")))
5180     (clobber (scratch:GPI))
5181     (clobber (reg:CC CC_REGNUM))])]
5182 )
5183
5184 ;; Introducing a temporary GP reg allows signed saturating arithmetic with GPR
5185 ;; operands to be calculated without the use of costly transfers to and from FP
5186 ;; registers.  For example, saturating addition usually uses three FMOVs:
5187 ;;
5188 ;;   fmov       d0, x0
5189 ;;   fmov       d1, x1
5190 ;;   sqadd      d0, d0, d1
5191 ;;   fmov       x0, d0
5192 ;;
5193 ;; Using a temporary register results in three cheaper instructions being used
5194 ;; in place of the three FMOVs, which calculate the saturating limit accounting
5195 ;; for the signedness of operand2:
5196 ;;
5197 ;;   asr        x2, x1, 63
5198 ;;   adds       x0, x0, x1
5199 ;;   eor        x2, x2, 0x8000000000000000
5200 ;;   csinv      x0, x0, x2, vc
5201 ;;
5202 ;; If operand2 is a constant value, the temporary register can be used to store
5203 ;; the saturating limit without the need for asr, xor to calculate said limit.
5204
5205 (define_insn_and_split "aarch64_<su_optab>s<addsub><mode>3<vczle><vczbe>"
5206   [(set (match_operand:GPI 0 "register_operand")
5207         (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5208                       (match_operand:GPI 2 "aarch64_plus_operand")))
5209     (clobber (match_scratch:GPI 3))
5210     (clobber (reg:CC CC_REGNUM))]
5211   ""
5212   {@ [ cons: =0, 1 , 2   , =3 ; attrs: type       , arch , length ]
5213      [ w       , w , w   , X  ; neon_q<addsub><q> , simd , 4      ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
5214      [ r       , r , JIr , &r ; *                 , *    , 8      ] #
5215   }
5216   "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
5217   [(set (match_dup 0)
5218         (if_then_else:GPI
5219           (match_dup 4)
5220           (match_dup 5)
5221           (match_dup 6)))]
5222   {
5223     if (REG_P (operands[2]))
5224       {
5225       rtx shift_constant = gen_int_mode (GET_MODE_BITSIZE (<MODE>mode) - 1,
5226                                          <MODE>mode);
5227       auto limit = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1);
5228       rtx limit_constant = gen_int_mode (limit, <MODE>mode);
5229       emit_insn (gen_ashr<mode>3 (operands[3], operands[2], shift_constant));
5230       emit_insn (gen_xor<mode>3 (operands[3], operands[3], limit_constant));
5231
5232       switch (<CODE>)
5233         {
5234         case SS_MINUS:
5235           emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
5236                                               operands[2]));
5237         break;
5238         case SS_PLUS:
5239           emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
5240                                               operands[2]));
5241           break;
5242         default:
5243           gcc_unreachable ();
5244         }
5245
5246       rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
5247       switch (<CODE>)
5248         {
5249         case SS_PLUS:
5250           operands[4] = gen_rtx_NE (<MODE>mode, ccin, const0_rtx);
5251           operands[5] = gen_rtx_NOT (<MODE>mode, operands[3]);
5252           operands[6] = operands[0];
5253           break;
5254         case SS_MINUS:
5255           operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
5256           operands[5] = operands[0];
5257           operands[6] = operands[3];
5258           break;
5259         default:
5260           gcc_unreachable ();
5261         }
5262       }
5263     else
5264       {
5265         auto imm = INTVAL (operands[2]);
5266         rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
5267         wide_int limit;
5268
5269         switch (<CODE>)
5270           {
5271           case SS_MINUS:
5272             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5273                                                     operands[2], neg_imm));
5274             limit = imm >= 0 ? wi::min_value (<MODE>mode, SIGNED)
5275                              : wi::max_value (<MODE>mode, SIGNED);
5276             break;
5277           case SS_PLUS:
5278             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5279                                                     neg_imm, operands[2]));
5280             limit = imm >= 0 ? wi::max_value (<MODE>mode, SIGNED)
5281                              : wi::min_value (<MODE>mode, SIGNED);
5282             break;
5283           default:
5284             gcc_unreachable ();
5285           }
5286
5287       rtx sat_limit = immed_wide_int_const (limit, <MODE>mode);
5288       emit_insn (gen_rtx_SET (operands[3], sat_limit));
5289
5290       rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
5291       operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
5292       operands[5] = operands[0];
5293       operands[6] = operands[3];
5294       }
5295   }
5296 )
5297
5298 ;; Unsigned saturating arithmetic with GPR operands can be optimised similarly
5299 ;; to the signed case, albeit without the need for a temporary register as the
5300 ;; saturating limit can be inferred from the <addsub> code.  This applies only
5301 ;; to SImode and DImode.
5302
5303 (define_insn_and_split "<su_optab>s<addsub><mode>3<vczle><vczbe>"
5304   [(set (match_operand:GPI 0 "register_operand")
5305         (UBINQOPS:GPI (match_operand:GPI 1 "register_operand")
5306                       (match_operand:GPI 2 "aarch64_plus_operand")))
5307     (clobber (reg:CC CC_REGNUM))]
5308   ""
5309   {@ [ cons: =0, 1 , 2   ; attrs: type       , arch , length ]
5310      [ w       , w , w   ; neon_q<addsub><q> , simd , 4      ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
5311      [ r       , r , JIr ; *                 , *    , 8      ] #
5312   }
5313   "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
5314   [(set (match_dup 0)
5315         (if_then_else:GPI
5316           (match_dup 3)
5317           (match_dup 0)
5318           (match_dup 4)))]
5319   {
5320
5321     if (REG_P (operands[2]))
5322       {
5323         switch (<CODE>)
5324           {
5325           case US_MINUS:
5326             emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
5327                                                 operands[2]));
5328             break;
5329           case US_PLUS:
5330             emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
5331                                                 operands[2]));
5332             break;
5333           default:
5334             gcc_unreachable ();
5335           }
5336       }
5337     else
5338       {
5339         auto imm = UINTVAL (operands[2]);
5340         rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
5341         switch (<CODE>)
5342           {
5343           case US_MINUS:
5344             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5345                                                     operands[2], neg_imm));
5346             break;
5347           case US_PLUS:
5348             emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
5349                                                     neg_imm, operands[2]));
5350             break;
5351           default:
5352             gcc_unreachable ();
5353           }
5354       }
5355
5356     rtx ccin = gen_rtx_REG (CCmode, CC_REGNUM);
5357     switch (<CODE>)
5358       {
5359       case US_PLUS:
5360         operands[3] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
5361         operands[4] = gen_int_mode (-1, <MODE>mode);
5362         break;
5363       case US_MINUS:
5364         operands[3] = gen_rtx_GEU (<MODE>mode, ccin, const0_rtx);
5365         operands[4] = const0_rtx;
5366         break;
5367       default:
5368         gcc_unreachable ();
5369       }
5370   }
5371 )
5372
5373 ;; suqadd and usqadd
5374
5375 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
5376   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5377         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
5378                         (match_operand:VSDQ_I 2 "register_operand" "w")]
5379                        USSUQADD))]
5380   "TARGET_SIMD"
5381   "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
5382   [(set_attr "type" "neon_qadd<q>")]
5383 )
5384
5385 ;; sqmovn and uqmovn
5386
5387 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5388   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5389         (SAT_TRUNC:<VNARROWQ>
5390           (match_operand:SD_HSDI 1 "register_operand" "w")))]
5391   "TARGET_SIMD"
5392   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5393   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5394 )
5395
5396 (define_insn "aarch64_<su>qmovn<mode><vczle><vczbe>"
5397   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5398         (SAT_TRUNC:<VNARROWQ>
5399           (match_operand:VQN 1 "register_operand" "w")))]
5400   "TARGET_SIMD"
5401   "<su>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5402   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5403 )
5404
5405 (define_insn "aarch64_<su>qxtn2<mode>_le"
5406   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5407         (vec_concat:<VNARROWQ2>
5408           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5409           (SAT_TRUNC:<VNARROWQ>
5410             (match_operand:VQN 2 "register_operand" "w"))))]
5411   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5412   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5413    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5414 )
5415
5416 (define_insn "aarch64_<su>qxtn2<mode>_be"
5417   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5418         (vec_concat:<VNARROWQ2>
5419           (SAT_TRUNC:<VNARROWQ>
5420             (match_operand:VQN 2 "register_operand" "w"))
5421           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5422   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5423   "<su>qxtn2\\t%0.<V2ntype>, %2.<Vtype>"
5424    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5425 )
5426
5427 (define_expand "aarch64_<su>qxtn2<mode>"
5428   [(match_operand:<VNARROWQ2> 0 "register_operand")
5429    (match_operand:<VNARROWQ> 1 "register_operand")
5430    (SAT_TRUNC:<VNARROWQ>
5431      (match_operand:VQN 2 "register_operand"))]
5432   "TARGET_SIMD"
5433   {
5434     if (BYTES_BIG_ENDIAN)
5435       emit_insn (gen_aarch64_<su>qxtn2<mode>_be (operands[0], operands[1],
5436                                                  operands[2]));
5437     else
5438       emit_insn (gen_aarch64_<su>qxtn2<mode>_le (operands[0], operands[1],
5439                                                  operands[2]));
5440     DONE;
5441   }
5442 )
5443
5444 ;; sqmovun
5445
5446 (define_insn "aarch64_sqmovun<mode>"
5447   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5448         (truncate:<VNARROWQ>
5449           (smin:SD_HSDI
5450             (smax:SD_HSDI
5451               (match_operand:SD_HSDI 1 "register_operand" "w")
5452               (const_int 0))
5453             (const_int <half_mask>))))]
5454    "TARGET_SIMD"
5455    "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5456    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5457 )
5458
5459 (define_insn "*aarch64_sqmovun<mode>_insn<vczle><vczbe>"
5460   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5461         (truncate:<VNARROWQ>
5462           (smin:VQN
5463             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5464                       (match_operand:VQN 2 "aarch64_simd_or_scalar_imm_zero"))
5465             (match_operand:VQN 3 "aarch64_simd_umax_half_mode"))))]
5466   "TARGET_SIMD"
5467   "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
5468   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5469 )
5470
5471 (define_expand "aarch64_sqmovun<mode>"
5472   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
5473         (truncate:<VNARROWQ>
5474           (smin:VQN
5475             (smax:VQN (match_operand:VQN 1 "register_operand" "w")
5476                       (match_dup 2))
5477             (match_dup 3))))]
5478   "TARGET_SIMD"
5479   {
5480     operands[2] = CONST0_RTX (<MODE>mode);
5481     operands[3]
5482       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5483                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5484   }
5485 )
5486
5487 (define_insn "aarch64_sqxtun2<mode>_le"
5488   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5489         (vec_concat:<VNARROWQ2>
5490           (match_operand:<VNARROWQ> 1 "register_operand" "0")
5491           (truncate:<VNARROWQ>
5492             (smin:VQN
5493               (smax:VQN
5494                 (match_operand:VQN 2 "register_operand" "w")
5495                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5496               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))))]
5497   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
5498   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5499    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5500 )
5501
5502 (define_insn "aarch64_sqxtun2<mode>_be"
5503   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
5504         (vec_concat:<VNARROWQ2>
5505           (truncate:<VNARROWQ>
5506             (smin:VQN
5507               (smax:VQN
5508                 (match_operand:VQN 2 "register_operand" "w")
5509                 (match_operand:VQN 3 "aarch64_simd_or_scalar_imm_zero"))
5510               (match_operand:VQN 4 "aarch64_simd_umax_half_mode")))
5511           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
5512   "TARGET_SIMD && BYTES_BIG_ENDIAN"
5513   "sqxtun2\\t%0.<V2ntype>, %2.<Vtype>"
5514    [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5515 )
5516
5517 (define_expand "aarch64_sqxtun2<mode>"
5518   [(match_operand:<VNARROWQ2> 0 "register_operand")
5519    (match_operand:<VNARROWQ> 1 "register_operand")
5520    (match_operand:VQN 2 "register_operand")]
5521   "TARGET_SIMD"
5522   {
5523     rtx zeros = CONST0_RTX (<MODE>mode);
5524     rtx half_umax = aarch64_simd_gen_const_vector_dup (<MODE>mode,
5525                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
5526     if (BYTES_BIG_ENDIAN)
5527       emit_insn (gen_aarch64_sqxtun2<mode>_be (operands[0], operands[1],
5528                                                operands[2], zeros, half_umax));
5529     else
5530       emit_insn (gen_aarch64_sqxtun2<mode>_le (operands[0], operands[1],
5531                                                operands[2], zeros, half_umax));
5532     DONE;
5533   }
5534 )
5535
5536 ;; <su>q<absneg>
5537
5538 (define_insn "aarch64_s<optab><mode><vczle><vczbe>"
5539   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
5540         (UNQOPS:VSDQ_I
5541           (match_operand:VSDQ_I 1 "register_operand" "w")))]
5542   "TARGET_SIMD"
5543   "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5544   [(set_attr "type" "neon_<optab><q>")]
5545 )
5546
5547 ;; sq<r>dmulh.
5548
5549 (define_insn "aarch64_sq<r>dmulh<mode><vczle><vczbe>"
5550   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5551         (unspec:VSDQ_HSI
5552           [(match_operand:VSDQ_HSI 1 "register_operand" "w")
5553            (match_operand:VSDQ_HSI 2 "register_operand" "w")]
5554          VQDMULH))]
5555   "TARGET_SIMD"
5556   "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5557   [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
5558 )
5559
5560 (define_insn "aarch64_sq<r>dmulh_n<mode><vczle><vczbe>"
5561   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5562         (unspec:VDQHS
5563           [(match_operand:VDQHS 1 "register_operand" "w")
5564            (vec_duplicate:VDQHS
5565              (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
5566          VQDMULH))]
5567   "TARGET_SIMD"
5568   "sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]"
5569   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5570 )
5571
5572 ;; sq<r>dmulh_lane
5573
5574 (define_insn "aarch64_sq<r>dmulh_lane<mode><vczle><vczbe>"
5575   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5576         (unspec:VDQHS
5577           [(match_operand:VDQHS 1 "register_operand" "w")
5578            (vec_select:<VEL>
5579              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5580              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5581          VQDMULH))]
5582   "TARGET_SIMD"
5583   "*
5584    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5585    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5586   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5587 )
5588
5589 (define_insn "aarch64_sq<r>dmulh_laneq<mode><vczle><vczbe>"
5590   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5591         (unspec:VDQHS
5592           [(match_operand:VDQHS 1 "register_operand" "w")
5593            (vec_select:<VEL>
5594              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5595              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5596          VQDMULH))]
5597   "TARGET_SIMD"
5598   "*
5599    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5600    return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
5601   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5602 )
5603
5604 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
5605   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5606         (unspec:SD_HSI
5607           [(match_operand:SD_HSI 1 "register_operand" "w")
5608            (vec_select:<VEL>
5609              (match_operand:<VCOND> 2 "register_operand" "<vwx>")
5610              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5611          VQDMULH))]
5612   "TARGET_SIMD"
5613   "*
5614    operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
5615    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5616   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5617 )
5618
5619 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
5620   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5621         (unspec:SD_HSI
5622           [(match_operand:SD_HSI 1 "register_operand" "w")
5623            (vec_select:<VEL>
5624              (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
5625              (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
5626          VQDMULH))]
5627   "TARGET_SIMD"
5628   "*
5629    operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
5630    return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
5631   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
5632 )
5633
5634 ;; sqrdml[as]h.
5635
5636 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode><vczle><vczbe>"
5637   [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
5638         (unspec:VSDQ_HSI
5639           [(match_operand:VSDQ_HSI 1 "register_operand" "0")
5640            (match_operand:VSDQ_HSI 2 "register_operand" "w")
5641            (match_operand:VSDQ_HSI 3 "register_operand" "w")]
5642           SQRDMLH_AS))]
5643    "TARGET_SIMD_RDMA"
5644    "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5645    [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5646 )
5647
5648 ;; sqrdml[as]h_lane.
5649
5650 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5651   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5652         (unspec:VDQHS
5653           [(match_operand:VDQHS 1 "register_operand" "0")
5654            (match_operand:VDQHS 2 "register_operand" "w")
5655            (vec_select:<VEL>
5656              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5657              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5658           SQRDMLH_AS))]
5659    "TARGET_SIMD_RDMA"
5660    {
5661      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5662      return
5663       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5664    }
5665    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5666 )
5667
5668 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode><vczle><vczbe>"
5669   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5670         (unspec:SD_HSI
5671           [(match_operand:SD_HSI 1 "register_operand" "0")
5672            (match_operand:SD_HSI 2 "register_operand" "w")
5673            (vec_select:<VEL>
5674              (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5675              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5676           SQRDMLH_AS))]
5677    "TARGET_SIMD_RDMA"
5678    {
5679      operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5680      return
5681       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
5682    }
5683    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5684 )
5685
5686 ;; sqrdml[as]h_laneq.
5687
5688 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5689   [(set (match_operand:VDQHS 0 "register_operand" "=w")
5690         (unspec:VDQHS
5691           [(match_operand:VDQHS 1 "register_operand" "0")
5692            (match_operand:VDQHS 2 "register_operand" "w")
5693            (vec_select:<VEL>
5694              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5695              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5696           SQRDMLH_AS))]
5697    "TARGET_SIMD_RDMA"
5698    {
5699      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5700      return
5701       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
5702    }
5703    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5704 )
5705
5706 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode><vczle><vczbe>"
5707   [(set (match_operand:SD_HSI 0 "register_operand" "=w")
5708         (unspec:SD_HSI
5709           [(match_operand:SD_HSI 1 "register_operand" "0")
5710            (match_operand:SD_HSI 2 "register_operand" "w")
5711            (vec_select:<VEL>
5712              (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5713              (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
5714           SQRDMLH_AS))]
5715    "TARGET_SIMD_RDMA"
5716    {
5717      operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5718      return
5719       "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
5720    }
5721    [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5722 )
5723
5724 ;; vqdml[sa]l
5725
5726 (define_insn "aarch64_sqdmlal<mode>"
5727   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5728         (ss_plus:<VWIDE>
5729           (ss_ashift:<VWIDE>
5730               (mult:<VWIDE>
5731                 (sign_extend:<VWIDE>
5732                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5733                 (sign_extend:<VWIDE>
5734                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5735               (const_int 1))
5736           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5737   "TARGET_SIMD"
5738   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5739   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5740 )
5741
5742 (define_insn "aarch64_sqdmlsl<mode>"
5743   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5744         (ss_minus:<VWIDE>
5745           (match_operand:<VWIDE> 1 "register_operand" "0")
5746           (ss_ashift:<VWIDE>
5747               (mult:<VWIDE>
5748                 (sign_extend:<VWIDE>
5749                       (match_operand:VSD_HSI 2 "register_operand" "w"))
5750                 (sign_extend:<VWIDE>
5751                       (match_operand:VSD_HSI 3 "register_operand" "w")))
5752               (const_int 1))))]
5753   "TARGET_SIMD"
5754   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
5755   [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
5756 )
5757
5758 ;; vqdml[sa]l_lane
5759
5760 (define_insn "aarch64_sqdmlal_lane<mode>"
5761   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5762         (ss_plus:<VWIDE>
5763           (ss_ashift:<VWIDE>
5764             (mult:<VWIDE>
5765               (sign_extend:<VWIDE>
5766                 (match_operand:VD_HSI 2 "register_operand" "w"))
5767               (vec_duplicate:<VWIDE>
5768                 (sign_extend:<VWIDE_S>
5769                   (vec_select:<VEL>
5770                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5771                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5772               ))
5773             (const_int 1))
5774           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5775   "TARGET_SIMD"
5776   {
5777     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5778     return
5779       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5780   }
5781   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5782 )
5783
5784 (define_insn "aarch64_sqdmlsl_lane<mode>"
5785   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5786         (ss_minus:<VWIDE>
5787           (match_operand:<VWIDE> 1 "register_operand" "0")
5788           (ss_ashift:<VWIDE>
5789             (mult:<VWIDE>
5790               (sign_extend:<VWIDE>
5791                 (match_operand:VD_HSI 2 "register_operand" "w"))
5792               (vec_duplicate:<VWIDE>
5793                 (sign_extend:<VWIDE_S>
5794                   (vec_select:<VEL>
5795                     (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5796                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5797               ))
5798             (const_int 1))))]
5799   "TARGET_SIMD"
5800   {
5801     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5802     return
5803       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5804   }
5805   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5806 )
5807
5808
5809 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5810   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5811         (ss_minus:<VWIDE>
5812           (match_operand:<VWIDE> 1 "register_operand" "0")
5813           (ss_ashift:<VWIDE>
5814             (mult:<VWIDE>
5815               (sign_extend:<VWIDE>
5816                 (match_operand:VD_HSI 2 "register_operand" "w"))
5817               (vec_duplicate:<VWIDE>
5818                 (sign_extend:<VWIDE_S>
5819                   (vec_select:<VEL>
5820                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5821                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5822               ))
5823             (const_int 1))))]
5824   "TARGET_SIMD"
5825   {
5826     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5827     return
5828       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5829   }
5830   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5831 )
5832
5833 (define_insn "aarch64_sqdmlal_laneq<mode>"
5834   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5835         (ss_plus:<VWIDE>
5836           (ss_ashift:<VWIDE>
5837             (mult:<VWIDE>
5838               (sign_extend:<VWIDE>
5839                 (match_operand:VD_HSI 2 "register_operand" "w"))
5840               (vec_duplicate:<VWIDE>
5841                 (sign_extend:<VWIDE_S>
5842                   (vec_select:<VEL>
5843                     (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5844                     (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5845               ))
5846             (const_int 1))
5847           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5848   "TARGET_SIMD"
5849   {
5850     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5851     return
5852       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5853   }
5854   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5855 )
5856
5857
5858 (define_insn "aarch64_sqdmlal_lane<mode>"
5859   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5860         (ss_plus:<VWIDE>
5861           (ss_ashift:<VWIDE>
5862             (mult:<VWIDE>
5863               (sign_extend:<VWIDE>
5864                 (match_operand:SD_HSI 2 "register_operand" "w"))
5865               (sign_extend:<VWIDE>
5866                 (vec_select:<VEL>
5867                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5868                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5869               )
5870             (const_int 1))
5871           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5872   "TARGET_SIMD"
5873   {
5874     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5875     return
5876       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5877   }
5878   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5879 )
5880
5881 (define_insn "aarch64_sqdmlsl_lane<mode>"
5882   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5883         (ss_minus:<VWIDE>
5884           (match_operand:<VWIDE> 1 "register_operand" "0")
5885           (ss_ashift:<VWIDE>
5886             (mult:<VWIDE>
5887               (sign_extend:<VWIDE>
5888                 (match_operand:SD_HSI 2 "register_operand" "w"))
5889               (sign_extend:<VWIDE>
5890                 (vec_select:<VEL>
5891                   (match_operand:<VCOND> 3 "register_operand" "<vwx>")
5892                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5893               )
5894             (const_int 1))))]
5895   "TARGET_SIMD"
5896   {
5897     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
5898     return
5899       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5900   }
5901   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5902 )
5903
5904
5905 (define_insn "aarch64_sqdmlal_laneq<mode>"
5906   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5907         (ss_plus:<VWIDE>
5908           (ss_ashift:<VWIDE>
5909             (mult:<VWIDE>
5910               (sign_extend:<VWIDE>
5911                 (match_operand:SD_HSI 2 "register_operand" "w"))
5912               (sign_extend:<VWIDE>
5913                 (vec_select:<VEL>
5914                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5915                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5916               )
5917             (const_int 1))
5918           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5919   "TARGET_SIMD"
5920   {
5921     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5922     return
5923       "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5924   }
5925   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5926 )
5927
5928 (define_insn "aarch64_sqdmlsl_laneq<mode>"
5929   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5930         (ss_minus:<VWIDE>
5931           (match_operand:<VWIDE> 1 "register_operand" "0")
5932           (ss_ashift:<VWIDE>
5933             (mult:<VWIDE>
5934               (sign_extend:<VWIDE>
5935                 (match_operand:SD_HSI 2 "register_operand" "w"))
5936               (sign_extend:<VWIDE>
5937                 (vec_select:<VEL>
5938                   (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
5939                   (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
5940               )
5941             (const_int 1))))]
5942   "TARGET_SIMD"
5943   {
5944     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
5945     return
5946       "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
5947   }
5948   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5949 )
5950
5951 ;; vqdml[sa]l_n
5952
5953 (define_insn "aarch64_sqdmlsl_n<mode>"
5954   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5955         (ss_minus:<VWIDE>
5956           (match_operand:<VWIDE> 1 "register_operand" "0")
5957           (ss_ashift:<VWIDE>
5958               (mult:<VWIDE>
5959                 (sign_extend:<VWIDE>
5960                       (match_operand:VD_HSI 2 "register_operand" "w"))
5961                 (vec_duplicate:<VWIDE>
5962                   (sign_extend:<VWIDE_S>
5963                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5964               (const_int 1))))]
5965   "TARGET_SIMD"
5966   "sqdmlsl\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5967   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5968 )
5969
5970 (define_insn "aarch64_sqdmlal_n<mode>"
5971   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5972         (ss_plus:<VWIDE>
5973           (ss_ashift:<VWIDE>
5974               (mult:<VWIDE>
5975                 (sign_extend:<VWIDE>
5976                       (match_operand:VD_HSI 2 "register_operand" "w"))
5977                 (vec_duplicate:<VWIDE>
5978                   (sign_extend:<VWIDE_S>
5979                     (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
5980               (const_int 1))
5981           (match_operand:<VWIDE> 1 "register_operand" "0")))]
5982   "TARGET_SIMD"
5983   "sqdmlal\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
5984   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
5985 )
5986
5987
5988 ;; sqdml[as]l2
5989
5990 (define_insn "aarch64_sqdmlal2<mode>_internal"
5991   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
5992         (ss_plus:<VWIDE>
5993          (ss_ashift:<VWIDE>
5994              (mult:<VWIDE>
5995                (sign_extend:<VWIDE>
5996                  (vec_select:<VHALF>
5997                      (match_operand:VQ_HSI 2 "register_operand" "w")
5998                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
5999                (sign_extend:<VWIDE>
6000                  (vec_select:<VHALF>
6001                      (match_operand:VQ_HSI 3 "register_operand" "w")
6002                      (match_dup 4))))
6003              (const_int 1))
6004           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6005   "TARGET_SIMD"
6006   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
6007   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6008 )
6009
6010 (define_insn "aarch64_sqdmlsl2<mode>_internal"
6011   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6012         (ss_minus:<VWIDE>
6013          (match_operand:<VWIDE> 1 "register_operand" "0")
6014          (ss_ashift:<VWIDE>
6015              (mult:<VWIDE>
6016                (sign_extend:<VWIDE>
6017                  (vec_select:<VHALF>
6018                      (match_operand:VQ_HSI 2 "register_operand" "w")
6019                      (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6020                (sign_extend:<VWIDE>
6021                  (vec_select:<VHALF>
6022                      (match_operand:VQ_HSI 3 "register_operand" "w")
6023                      (match_dup 4))))
6024              (const_int 1))))]
6025   "TARGET_SIMD"
6026   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
6027   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6028 )
6029
6030 (define_expand "aarch64_sqdml<SBINQOPS:as>l2<mode>"
6031   [(match_operand:<VWIDE> 0 "register_operand")
6032    (SBINQOPS:<VWIDE>
6033      (match_operand:<VWIDE> 1 "register_operand")
6034      (match_dup 1))
6035    (match_operand:VQ_HSI 2 "register_operand")
6036    (match_operand:VQ_HSI 3 "register_operand")]
6037   "TARGET_SIMD"
6038 {
6039   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6040   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2<mode>_internal (operands[0],
6041                                                 operands[1], operands[2],
6042                                                 operands[3], p));
6043   DONE;
6044 })
6045
6046 ;; vqdml[sa]l2_lane
6047
6048 (define_insn "aarch64_sqdmlsl2_lane<mode>_internal"
6049   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6050         (ss_minus:<VWIDE>
6051           (match_operand:<VWIDE> 1 "register_operand" "0")
6052           (ss_ashift:<VWIDE>
6053               (mult:<VWIDE>
6054                 (sign_extend:<VWIDE>
6055                   (vec_select:<VHALF>
6056                     (match_operand:VQ_HSI 2 "register_operand" "w")
6057                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6058                 (vec_duplicate:<VWIDE>
6059                   (sign_extend:<VWIDE_S>
6060                     (vec_select:<VEL>
6061                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
6062                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6063                     ))))
6064               (const_int 1))))]
6065   "TARGET_SIMD"
6066   {
6067     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
6068     return
6069      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6070   }
6071   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6072 )
6073
6074 (define_insn "aarch64_sqdmlal2_lane<mode>_internal"
6075   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6076         (ss_plus:<VWIDE>
6077           (ss_ashift:<VWIDE>
6078               (mult:<VWIDE>
6079                 (sign_extend:<VWIDE>
6080                   (vec_select:<VHALF>
6081                     (match_operand:VQ_HSI 2 "register_operand" "w")
6082                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6083                 (vec_duplicate:<VWIDE>
6084                   (sign_extend:<VWIDE_S>
6085                     (vec_select:<VEL>
6086                       (match_operand:<VCOND> 3 "register_operand" "<vwx>")
6087                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6088                     ))))
6089               (const_int 1))
6090           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6091   "TARGET_SIMD"
6092   {
6093     operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
6094     return
6095      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6096   }
6097   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6098 )
6099
6100 (define_insn "aarch64_sqdmlsl2_laneq<mode>_internal"
6101   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6102         (ss_minus:<VWIDE>
6103           (match_operand:<VWIDE> 1 "register_operand" "0")
6104           (ss_ashift:<VWIDE>
6105               (mult:<VWIDE>
6106                 (sign_extend:<VWIDE>
6107                   (vec_select:<VHALF>
6108                     (match_operand:VQ_HSI 2 "register_operand" "w")
6109                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6110                 (vec_duplicate:<VWIDE>
6111                   (sign_extend:<VWIDE_S>
6112                     (vec_select:<VEL>
6113                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6114                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6115                     ))))
6116               (const_int 1))))]
6117   "TARGET_SIMD"
6118   {
6119     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6120     return
6121      "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6122   }
6123   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6124 )
6125
6126 (define_insn "aarch64_sqdmlal2_laneq<mode>_internal"
6127   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6128         (ss_plus:<VWIDE>
6129           (ss_ashift:<VWIDE>
6130               (mult:<VWIDE>
6131                 (sign_extend:<VWIDE>
6132                   (vec_select:<VHALF>
6133                     (match_operand:VQ_HSI 2 "register_operand" "w")
6134                     (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
6135                 (vec_duplicate:<VWIDE>
6136                   (sign_extend:<VWIDE_S>
6137                     (vec_select:<VEL>
6138                       (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
6139                       (parallel [(match_operand:SI 4 "immediate_operand" "i")])
6140                     ))))
6141               (const_int 1))
6142           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6143   "TARGET_SIMD"
6144   {
6145     operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
6146     return
6147      "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
6148   }
6149   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6150 )
6151
6152 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>"
6153   [(match_operand:<VWIDE> 0 "register_operand")
6154    (SBINQOPS:<VWIDE>
6155      (match_operand:<VWIDE> 1 "register_operand")
6156      (match_dup 1))
6157    (match_operand:VQ_HSI 2 "register_operand")
6158    (match_operand:<VCOND> 3 "register_operand")
6159    (match_operand:SI 4 "immediate_operand")]
6160   "TARGET_SIMD"
6161 {
6162   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6163   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal (operands[0],
6164                                                 operands[1], operands[2],
6165                                                 operands[3], operands[4], p));
6166   DONE;
6167 })
6168
6169 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>"
6170   [(match_operand:<VWIDE> 0 "register_operand")
6171    (SBINQOPS:<VWIDE>
6172      (match_operand:<VWIDE> 1 "register_operand")
6173      (match_dup 1))
6174    (match_operand:VQ_HSI 2 "register_operand")
6175    (match_operand:<VCONQ> 3 "register_operand")
6176    (match_operand:SI 4 "immediate_operand")]
6177   "TARGET_SIMD"
6178 {
6179   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6180   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal (operands[0],
6181                                                 operands[1], operands[2],
6182                                                 operands[3], operands[4], p));
6183   DONE;
6184 })
6185
6186 (define_insn "aarch64_sqdmlsl2_n<mode>_internal"
6187   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6188         (ss_minus:<VWIDE>
6189           (match_operand:<VWIDE> 1 "register_operand" "0")
6190           (ss_ashift:<VWIDE>
6191             (mult:<VWIDE>
6192               (sign_extend:<VWIDE>
6193                 (vec_select:<VHALF>
6194                   (match_operand:VQ_HSI 2 "register_operand" "w")
6195                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6196               (vec_duplicate:<VWIDE>
6197                 (sign_extend:<VWIDE_S>
6198                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6199             (const_int 1))))]
6200   "TARGET_SIMD"
6201   "sqdmlsl2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6202   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6203 )
6204
6205 (define_insn "aarch64_sqdmlal2_n<mode>_internal"
6206   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6207         (ss_plus:<VWIDE>
6208           (ss_ashift:<VWIDE>
6209             (mult:<VWIDE>
6210               (sign_extend:<VWIDE>
6211                 (vec_select:<VHALF>
6212                   (match_operand:VQ_HSI 2 "register_operand" "w")
6213                   (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6214               (vec_duplicate:<VWIDE>
6215                 (sign_extend:<VWIDE_S>
6216                   (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
6217             (const_int 1))
6218           (match_operand:<VWIDE> 1 "register_operand" "0")))]
6219   "TARGET_SIMD"
6220   "sqdmlal2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
6221   [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
6222 )
6223
6224 (define_expand "aarch64_sqdml<SBINQOPS:as>l2_n<mode>"
6225   [(match_operand:<VWIDE> 0 "register_operand")
6226    (SBINQOPS:<VWIDE>
6227      (match_operand:<VWIDE> 1 "register_operand")
6228      (match_dup 1))
6229    (match_operand:VQ_HSI 2 "register_operand")
6230    (match_operand:<VEL> 3 "register_operand")]
6231   "TARGET_SIMD"
6232 {
6233   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6234   emit_insn (gen_aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal (operands[0],
6235                                                 operands[1], operands[2],
6236                                                 operands[3], p));
6237   DONE;
6238 })
6239
6240 ;; vqdmull
6241
6242 (define_insn "aarch64_sqdmull<mode>"
6243   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6244         (ss_ashift:<VWIDE>
6245              (mult:<VWIDE>
6246                (sign_extend:<VWIDE>
6247                      (match_operand:VSD_HSI 1 "register_operand" "w"))
6248                (sign_extend:<VWIDE>
6249                      (match_operand:VSD_HSI 2 "register_operand" "w")))
6250              (const_int 1)))]
6251   "TARGET_SIMD"
6252   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6253   [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
6254 )
6255
6256 ;; vqdmull_lane
6257
6258 (define_insn "aarch64_sqdmull_lane<mode>"
6259   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6260         (ss_ashift:<VWIDE>
6261              (mult:<VWIDE>
6262                (sign_extend:<VWIDE>
6263                  (match_operand:VD_HSI 1 "register_operand" "w"))
6264                (vec_duplicate:<VWIDE>
6265                  (sign_extend:<VWIDE_S>
6266                    (vec_select:<VEL>
6267                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6268                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6269                ))
6270              (const_int 1)))]
6271   "TARGET_SIMD"
6272   {
6273     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6274     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6275   }
6276   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6277 )
6278
6279 (define_insn "aarch64_sqdmull_laneq<mode>"
6280   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6281         (ss_ashift:<VWIDE>
6282              (mult:<VWIDE>
6283                (sign_extend:<VWIDE>
6284                  (match_operand:VD_HSI 1 "register_operand" "w"))
6285                (vec_duplicate:<VWIDE>
6286                  (sign_extend:<VWIDE_S>
6287                    (vec_select:<VEL>
6288                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6289                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6290                ))
6291              (const_int 1)))]
6292   "TARGET_SIMD"
6293   {
6294     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6295     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6296   }
6297   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6298 )
6299
6300 (define_insn "aarch64_sqdmull_lane<mode>"
6301   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6302         (ss_ashift:<VWIDE>
6303              (mult:<VWIDE>
6304                (sign_extend:<VWIDE>
6305                  (match_operand:SD_HSI 1 "register_operand" "w"))
6306                (sign_extend:<VWIDE>
6307                  (vec_select:<VEL>
6308                    (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6309                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6310                ))
6311              (const_int 1)))]
6312   "TARGET_SIMD"
6313   {
6314     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6315     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6316   }
6317   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6318 )
6319
6320 (define_insn "aarch64_sqdmull_laneq<mode>"
6321   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6322         (ss_ashift:<VWIDE>
6323              (mult:<VWIDE>
6324                (sign_extend:<VWIDE>
6325                  (match_operand:SD_HSI 1 "register_operand" "w"))
6326                (sign_extend:<VWIDE>
6327                  (vec_select:<VEL>
6328                    (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6329                    (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
6330                ))
6331              (const_int 1)))]
6332   "TARGET_SIMD"
6333   {
6334     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6335     return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6336   }
6337   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6338 )
6339
6340 ;; vqdmull_n
6341
6342 (define_insn "aarch64_sqdmull_n<mode>"
6343   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6344         (ss_ashift:<VWIDE>
6345              (mult:<VWIDE>
6346                (sign_extend:<VWIDE>
6347                  (match_operand:VD_HSI 1 "register_operand" "w"))
6348                (vec_duplicate:<VWIDE>
6349                  (sign_extend:<VWIDE_S>
6350                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6351                )
6352              (const_int 1)))]
6353   "TARGET_SIMD"
6354   "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6355   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6356 )
6357
6358 ;; vqdmull2
6359
6360 (define_insn "aarch64_sqdmull2<mode>_internal"
6361   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6362         (ss_ashift:<VWIDE>
6363              (mult:<VWIDE>
6364                (sign_extend:<VWIDE>
6365                  (vec_select:<VHALF>
6366                    (match_operand:VQ_HSI 1 "register_operand" "w")
6367                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6368                (sign_extend:<VWIDE>
6369                  (vec_select:<VHALF>
6370                    (match_operand:VQ_HSI 2 "register_operand" "w")
6371                    (match_dup 3)))
6372                )
6373              (const_int 1)))]
6374   "TARGET_SIMD"
6375   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
6376   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6377 )
6378
6379 (define_expand "aarch64_sqdmull2<mode>"
6380   [(match_operand:<VWIDE> 0 "register_operand")
6381    (match_operand:VQ_HSI 1 "register_operand")
6382    (match_operand:VQ_HSI 2 "register_operand")]
6383   "TARGET_SIMD"
6384 {
6385   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6386   emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
6387                                                   operands[2], p));
6388   DONE;
6389 })
6390
6391 ;; vqdmull2_lane
6392
6393 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
6394   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6395         (ss_ashift:<VWIDE>
6396              (mult:<VWIDE>
6397                (sign_extend:<VWIDE>
6398                  (vec_select:<VHALF>
6399                    (match_operand:VQ_HSI 1 "register_operand" "w")
6400                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6401                (vec_duplicate:<VWIDE>
6402                  (sign_extend:<VWIDE_S>
6403                    (vec_select:<VEL>
6404                      (match_operand:<VCOND> 2 "register_operand" "<vwx>")
6405                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6406                ))
6407              (const_int 1)))]
6408   "TARGET_SIMD"
6409   {
6410     operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
6411     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6412   }
6413   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6414 )
6415
6416 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
6417   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6418         (ss_ashift:<VWIDE>
6419              (mult:<VWIDE>
6420                (sign_extend:<VWIDE>
6421                  (vec_select:<VHALF>
6422                    (match_operand:VQ_HSI 1 "register_operand" "w")
6423                    (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
6424                (vec_duplicate:<VWIDE>
6425                  (sign_extend:<VWIDE_S>
6426                    (vec_select:<VEL>
6427                      (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
6428                      (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
6429                ))
6430              (const_int 1)))]
6431   "TARGET_SIMD"
6432   {
6433     operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
6434     return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
6435   }
6436   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6437 )
6438
6439 (define_expand "aarch64_sqdmull2_lane<mode>"
6440   [(match_operand:<VWIDE> 0 "register_operand")
6441    (match_operand:VQ_HSI 1 "register_operand")
6442    (match_operand:<VCOND> 2 "register_operand")
6443    (match_operand:SI 3 "immediate_operand")]
6444   "TARGET_SIMD"
6445 {
6446   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6447   emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
6448                                                        operands[2], operands[3],
6449                                                        p));
6450   DONE;
6451 })
6452
6453 (define_expand "aarch64_sqdmull2_laneq<mode>"
6454   [(match_operand:<VWIDE> 0 "register_operand")
6455    (match_operand:VQ_HSI 1 "register_operand")
6456    (match_operand:<VCONQ> 2 "register_operand")
6457    (match_operand:SI 3 "immediate_operand")]
6458   "TARGET_SIMD"
6459 {
6460   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6461   emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
6462                                                        operands[2], operands[3],
6463                                                        p));
6464   DONE;
6465 })
6466
6467 ;; vqdmull2_n
6468
6469 (define_insn "aarch64_sqdmull2_n<mode>_internal"
6470   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
6471         (ss_ashift:<VWIDE>
6472              (mult:<VWIDE>
6473                (sign_extend:<VWIDE>
6474                  (vec_select:<VHALF>
6475                    (match_operand:VQ_HSI 1 "register_operand" "w")
6476                    (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
6477                (vec_duplicate:<VWIDE>
6478                  (sign_extend:<VWIDE_S>
6479                    (match_operand:<VEL> 2 "register_operand" "<vwx>")))
6480                )
6481              (const_int 1)))]
6482   "TARGET_SIMD"
6483   "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
6484   [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
6485 )
6486
6487 (define_expand "aarch64_sqdmull2_n<mode>"
6488   [(match_operand:<VWIDE> 0 "register_operand")
6489    (match_operand:VQ_HSI 1 "register_operand")
6490    (match_operand:<VEL> 2 "register_operand")]
6491   "TARGET_SIMD"
6492 {
6493   rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6494   emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
6495                                                     operands[2], p));
6496   DONE;
6497 })
6498
6499 ;; vshl
6500
6501 (define_insn "aarch64_<sur>shl<mode><vczle><vczbe>"
6502   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6503         (unspec:VSDQ_I_DI
6504           [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
6505            (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
6506          VSHL))]
6507   "TARGET_SIMD"
6508   "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6509   [(set_attr "type" "neon_shift_reg<q>")]
6510 )
6511
6512
6513 ;; vqshl
6514
6515 (define_insn "aarch64_<sur>q<r>shl<mode><vczle><vczbe>"
6516   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6517         (unspec:VSDQ_I
6518           [(match_operand:VSDQ_I 1 "register_operand" "w")
6519            (match_operand:VSDQ_I 2 "register_operand" "w")]
6520          VQSHL))]
6521   "TARGET_SIMD"
6522   "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
6523   [(set_attr "type" "neon_sat_shift_reg<q>")]
6524 )
6525
6526 ;; vshll_n
6527
6528 (define_insn "aarch64_<su>shll<mode>"
6529   [(set (match_operand:<VWIDE> 0 "register_operand")
6530         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6531                             (match_operand:VD_BHSI 1 "register_operand"))
6532                          (match_operand:<VWIDE> 2
6533                            "aarch64_simd_shll_imm_vec")))]
6534   "TARGET_SIMD"
6535   {@ [cons: =0, 1, 2]
6536      [w, w, D2] shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6537      [w, w, DL] <su>shll\t%0.<Vwtype>, %1.<Vtype>, %I2
6538   }
6539   [(set_attr "type" "neon_shift_imm_long")]
6540 )
6541
6542 (define_expand "aarch64_<sur>shll_n<mode>"
6543   [(set (match_operand:<VWIDE> 0 "register_operand")
6544         (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand")
6545                          (match_operand:SI 2
6546                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6547                          VSHLL))]
6548   "TARGET_SIMD"
6549   {
6550     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6551     emit_insn (gen_aarch64_<sur>shll<mode> (operands[0], operands[1], shft));
6552     DONE;
6553   }
6554 )
6555
6556 ;; vshll_high_n
6557
6558 (define_insn "aarch64_<su>shll2<mode>"
6559   [(set (match_operand:<VWIDE> 0 "register_operand")
6560         (ashift:<VWIDE> (ANY_EXTEND:<VWIDE>
6561                           (vec_select:<VHALF>
6562                             (match_operand:VQW 1 "register_operand")
6563                             (match_operand:VQW 2 "vect_par_cnst_hi_half")))
6564                          (match_operand:<VWIDE> 3
6565                            "aarch64_simd_shll_imm_vec")))]
6566   "TARGET_SIMD"
6567   {@ [cons: =0, 1, 2, 3]
6568      [w, w, , D2] shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6569      [w, w, , DL] <su>shll2\t%0.<Vwtype>, %1.<Vtype>, %I3
6570   }
6571   [(set_attr "type" "neon_shift_imm_long")]
6572 )
6573
6574 (define_expand "aarch64_<sur>shll2_n<mode>"
6575   [(set (match_operand:<VWIDE> 0 "register_operand")
6576         (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand")
6577                          (match_operand:SI 2
6578                            "aarch64_simd_shift_imm_bitsize_<ve_mode>")]
6579                          VSHLL))]
6580   "TARGET_SIMD"
6581   {
6582     rtx shft = gen_const_vec_duplicate (<VWIDE>mode, operands[2]);
6583     rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
6584     emit_insn (gen_aarch64_<sur>shll2<mode> (operands[0], operands[1], p, shft));
6585     DONE;
6586   }
6587 )
6588
6589 ;; vrshr_n
6590
6591 (define_insn "aarch64_<sra_op>rshr_n<mode><vczle><vczbe>_insn"
6592   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6593         (truncate:VSDQ_I_DI
6594           (SHIFTRT:<V2XWIDE>
6595             (plus:<V2XWIDE>
6596               (<SHIFTEXTEND>:<V2XWIDE>
6597                 (match_operand:VSDQ_I_DI 1 "register_operand" "w"))
6598               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6599             (match_operand:VSDQ_I_DI 2 "aarch64_simd_shift_imm_<vec_or_offset>_<Vel>"))))]
6600   "TARGET_SIMD
6601    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6602   "<sra_op>rshr\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6603   [(set_attr "type" "neon_sat_shift_imm<q>")]
6604 )
6605
6606 (define_expand "aarch64_<sra_op>rshr_n<mode>"
6607   [(match_operand:VSDQ_I_DI 0 "register_operand")
6608    (SHIFTRT:VSDQ_I_DI
6609      (match_operand:VSDQ_I_DI 1 "register_operand")
6610      (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))]
6611   "TARGET_SIMD"
6612   {
6613     /* Use this expander to create the rounding constant vector, which is
6614        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6615        RTL is generated when handling the DImode expanders.  */
6616     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6617     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6618     rtx shft = gen_int_mode (INTVAL (operands[2]), DImode);
6619     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6620     if (VECTOR_MODE_P (<MODE>mode))
6621       {
6622         shft = gen_const_vec_duplicate (<MODE>mode, shft);
6623         rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
6624       }
6625
6626     emit_insn (gen_aarch64_<sra_op>rshr_n<mode>_insn (operands[0], operands[1],
6627                                                       shft, rnd));
6628     DONE;
6629   }
6630 )
6631
6632 ;; v(r)sra_n
6633
6634 (define_insn "aarch64_<sur>sra_ndi"
6635   [(set (match_operand:DI 0 "register_operand" "=w")
6636        (unspec:DI [(match_operand:DI 1 "register_operand" "0")
6637                       (match_operand:DI 2 "register_operand" "w")
6638                        (match_operand:SI 3
6639                         "aarch64_simd_shift_imm_offset_di" "i")]
6640                       VSRA))]
6641   "TARGET_SIMD"
6642   "<sur>sra\\t%d0, %d2, %3"
6643   [(set_attr "type" "neon_shift_acc")]
6644 )
6645
6646 ;; vs<lr>i_n
6647
6648 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
6649   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
6650         (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
6651                        (match_operand:VSDQ_I_DI 2 "register_operand" "w")
6652                        (match_operand:SI 3
6653                          "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
6654                       VSLRI))]
6655   "TARGET_SIMD"
6656   "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
6657   [(set_attr "type" "neon_shift_imm<q>")]
6658 )
6659
6660 ;; vqshl(u)
6661
6662 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
6663   [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
6664         (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
6665                        (match_operand:SI 2
6666                          "aarch64_simd_shift_imm_<ve_mode>" "i")]
6667                       VQSHL_N))]
6668   "TARGET_SIMD"
6669   "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
6670   [(set_attr "type" "neon_sat_shift_imm<q>")]
6671 )
6672
6673
6674 ;; vq(r)shr(u)n_n
6675
6676 (define_insn "aarch64_<shrn_op>shrn_n<mode>"
6677   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6678         (SAT_TRUNC:<VNARROWQ>
6679           (<TRUNC_SHIFT>:SD_HSDI
6680             (match_operand:SD_HSDI 1 "register_operand" "w")
6681             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6682   "TARGET_SIMD"
6683   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6684   [(set_attr "type" "neon_shift_imm_narrow_q")]
6685 )
6686
6687 (define_insn "*aarch64_<shrn_op><shrn_s>shrn_n<mode>_insn<vczle><vczbe>"
6688   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6689         (ALL_TRUNC:<VNARROWQ>
6690           (SHIFTRT:VQN
6691             (match_operand:VQN 1 "register_operand" "w")
6692             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6693   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6694   "<shrn_op>shrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6695   [(set_attr "type" "neon_shift_imm_narrow_q")]
6696 )
6697
6698 (define_expand "aarch64_<shrn_op>shrn_n<mode>"
6699   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6700         (ALL_TRUNC:<VNARROWQ>
6701           (<TRUNC_SHIFT>:VQN
6702             (match_operand:VQN 1 "register_operand")
6703             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6704   "TARGET_SIMD"
6705   {
6706     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6707                                                  INTVAL (operands[2]));
6708   }
6709 )
6710
6711 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn<vczle><vczbe>"
6712   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6713         (ALL_TRUNC:<VNARROWQ>
6714           (<TRUNC_SHIFT>:<V2XWIDE>
6715             (plus:<V2XWIDE>
6716               (<TRUNCEXTEND>:<V2XWIDE>
6717                 (match_operand:VQN 1 "register_operand" "w"))
6718               (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6719             (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))))]
6720   "TARGET_SIMD
6721    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6722   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6723   [(set_attr "type" "neon_shift_imm_narrow_q")]
6724 )
6725
6726 (define_insn "*aarch64_<shrn_op>rshrn_n<mode>_insn"
6727   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6728         (SAT_TRUNC:<VNARROWQ>
6729           (<TRUNC_SHIFT>:<DWI>
6730             (plus:<DWI>
6731               (<TRUNCEXTEND>:<DWI>
6732                 (match_operand:SD_HSDI 1 "register_operand" "w"))
6733               (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6734             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6735   "TARGET_SIMD
6736    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6737   "<shrn_op>rshrn\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6738   [(set_attr "type" "neon_shift_imm_narrow_q")]
6739 )
6740
6741 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6742   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6743         (SAT_TRUNC:<VNARROWQ>
6744           (<TRUNC_SHIFT>:<V2XWIDE>
6745             (plus:<V2XWIDE>
6746               (<TRUNCEXTEND>:<V2XWIDE>
6747                 (match_operand:SD_HSDI 1 "register_operand"))
6748               (match_dup 3))
6749             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6750   "TARGET_SIMD"
6751   {
6752     /* Use this expander to create the rounding constant vector, which is
6753        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6754        RTL is generated when handling the DImode expanders.  */
6755     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6756     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6757     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6758   }
6759 )
6760
6761 (define_expand "aarch64_<shrn_op>rshrn_n<mode>"
6762   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6763         (ALL_TRUNC:<VNARROWQ>
6764           (<TRUNC_SHIFT>:<V2XWIDE>
6765             (plus:<V2XWIDE>
6766               (<TRUNCEXTEND>:<V2XWIDE>
6767                 (match_operand:VQN 1 "register_operand"))
6768               (match_dup 3))
6769             (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))))]
6770   "TARGET_SIMD"
6771   {
6772     if (<CODE> == TRUNCATE
6773         && INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
6774       {
6775         rtx tmp0 = aarch64_gen_shareable_zero (<MODE>mode);
6776         emit_insn (gen_aarch64_raddhn<mode> (operands[0], operands[1], tmp0));
6777         DONE;
6778       }
6779     /* Use this expander to create the rounding constant vector, which is
6780        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
6781        RTL is generated when handling the DImode expanders.  */
6782     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6783     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6784     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6785     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6786     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6787   }
6788 )
6789
6790 (define_insn "*aarch64_sqshrun_n<mode>_insn<vczle><vczbe>"
6791   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6792         (truncate:<VNARROWQ>
6793           (smin:VQN
6794             (smax:VQN
6795               (ashiftrt:VQN
6796                 (match_operand:VQN 1 "register_operand" "w")
6797                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6798               (match_operand:VQN 3 "aarch64_simd_imm_zero"))
6799             (match_operand:VQN 4 "aarch64_simd_umax_half_mode"))))]
6800   "TARGET_SIMD"
6801   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6802   [(set_attr "type" "neon_shift_imm_narrow_q")]
6803 )
6804
6805 (define_insn "aarch64_sqshrun_n<mode>_insn"
6806   [(set (match_operand:SD_HSDI 0 "register_operand" "=w")
6807         (smin:SD_HSDI
6808           (smax:SD_HSDI
6809             (ashiftrt:SD_HSDI
6810               (match_operand:SD_HSDI 1 "register_operand" "w")
6811               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6812             (const_int 0))
6813           (const_int <half_mask>)))]
6814   "TARGET_SIMD"
6815   "sqshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6816   [(set_attr "type" "neon_shift_imm_narrow_q")]
6817 )
6818
6819 (define_expand "aarch64_sqshrun_n<mode>"
6820   [(match_operand:<VNARROWQ> 0 "register_operand")
6821    (match_operand:SD_HSDI 1 "register_operand")
6822    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6823   "TARGET_SIMD"
6824   {
6825     rtx dst = gen_reg_rtx (<MODE>mode);
6826     emit_insn (gen_aarch64_sqshrun_n<mode>_insn (dst, operands[1],
6827                                                  operands[2]));
6828     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6829     DONE;
6830   }
6831 )
6832
6833 (define_expand "aarch64_sqshrun_n<mode>"
6834   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6835         (truncate:<VNARROWQ>
6836           (smin:VQN
6837             (smax:VQN
6838               (ashiftrt:VQN
6839                 (match_operand:VQN 1 "register_operand")
6840                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6841               (match_dup 3))
6842             (match_dup 4))))]
6843   "TARGET_SIMD"
6844   {
6845     operands[2] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6846                                                  INTVAL (operands[2]));
6847     operands[3] = CONST0_RTX (<MODE>mode);
6848     operands[4]
6849       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6850                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
6851   }
6852 )
6853
6854 (define_insn "*aarch64_sqrshrun_n<mode>_insn<vczle><vczbe>"
6855   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
6856         (truncate:<VNARROWQ>
6857           (smin:<V2XWIDE>
6858             (smax:<V2XWIDE>
6859               (ashiftrt:<V2XWIDE>
6860                 (plus:<V2XWIDE>
6861                   (sign_extend:<V2XWIDE>
6862                     (match_operand:VQN 1 "register_operand" "w"))
6863                   (match_operand:<V2XWIDE> 3 "aarch64_int_rnd_operand"))
6864                 (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_<vn_mode>"))
6865               (match_operand:<V2XWIDE> 4 "aarch64_simd_imm_zero"))
6866             (match_operand:<V2XWIDE> 5 "aarch64_simd_umax_quarter_mode"))))]
6867   "TARGET_SIMD
6868    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6869   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6870   [(set_attr "type" "neon_shift_imm_narrow_q")]
6871 )
6872
6873 (define_insn "aarch64_sqrshrun_n<mode>_insn"
6874   [(set (match_operand:<DWI> 0 "register_operand" "=w")
6875         (smin:<DWI>
6876           (smax:<DWI>
6877             (ashiftrt:<DWI>
6878               (plus:<DWI>
6879                 (sign_extend:<DWI>
6880                   (match_operand:SD_HSDI 1 "register_operand" "w"))
6881                 (match_operand:<DWI> 3 "aarch64_int_rnd_operand"))
6882               (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6883             (const_int 0))
6884           (const_int <half_mask>)))]
6885   "TARGET_SIMD
6886    && aarch64_const_vec_rnd_cst_p (operands[3], operands[2])"
6887   "sqrshrun\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
6888   [(set_attr "type" "neon_shift_imm_narrow_q")]
6889 )
6890
6891 (define_expand "aarch64_sqrshrun_n<mode>"
6892   [(match_operand:<VNARROWQ> 0 "register_operand")
6893    (match_operand:SD_HSDI 1 "register_operand")
6894    (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>")]
6895   "TARGET_SIMD"
6896   {
6897     int prec = GET_MODE_UNIT_PRECISION (<DWI>mode);
6898     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6899     rtx rnd = immed_wide_int_const (rnd_wi, <DWI>mode);
6900     rtx dst = gen_reg_rtx (<DWI>mode);
6901     emit_insn (gen_aarch64_sqrshrun_n<mode>_insn (dst, operands[1], operands[2], rnd));
6902     emit_move_insn (operands[0], gen_lowpart (<VNARROWQ>mode, dst));
6903     DONE;
6904   }
6905 )
6906
6907 (define_expand "aarch64_sqrshrun_n<mode>"
6908   [(set (match_operand:<VNARROWQ> 0 "register_operand")
6909         (truncate:<VNARROWQ>
6910           (smin:<V2XWIDE>
6911             (smax:<V2XWIDE>
6912               (ashiftrt:<V2XWIDE>
6913                 (plus:<V2XWIDE>
6914                   (sign_extend:<V2XWIDE>
6915                     (match_operand:VQN 1 "register_operand"))
6916                   (match_dup 3))
6917                 (match_operand:SI 2 "aarch64_simd_shift_imm_offset_<ve_mode>"))
6918               (match_dup 4))
6919             (match_dup 5))))]
6920   "TARGET_SIMD"
6921   {
6922     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
6923     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[2]) - 1, prec);
6924     operands[3] = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
6925     operands[3] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[3]);
6926     operands[2] = gen_const_vec_duplicate (<MODE>mode, operands[2]);
6927     operands[4] = CONST0_RTX (<V2XWIDE>mode);
6928     operands[5]
6929       = gen_int_mode (GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)), DImode);
6930     operands[5] = gen_const_vec_duplicate (<V2XWIDE>mode, operands[5]);
6931   }
6932 )
6933
6934 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le"
6935   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6936         (vec_concat:<VNARROWQ2>
6937           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6938           (ALL_TRUNC:<VNARROWQ>
6939             (SHIFTRT:VQN
6940               (match_operand:VQN 2 "register_operand" "w")
6941               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6942   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6943    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6944   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6945   [(set_attr "type" "neon_shift_imm_narrow_q")]
6946 )
6947
6948 (define_insn "aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be"
6949   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6950         (vec_concat:<VNARROWQ2>
6951           (ALL_TRUNC:<VNARROWQ>
6952             (SHIFTRT:VQN
6953               (match_operand:VQN 2 "register_operand" "w")
6954               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
6955           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
6956   "TARGET_SIMD && BYTES_BIG_ENDIAN
6957    && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6958   "<shrn_op>shrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6959   [(set_attr "type" "neon_shift_imm_narrow_q")]
6960 )
6961
6962 (define_expand "aarch64_<shrn_op><sra_op>shrn2_n<mode>"
6963   [(match_operand:<VNARROWQ2> 0 "register_operand")
6964    (match_operand:<VNARROWQ> 1 "register_operand")
6965    (ALL_TRUNC:<VNARROWQ>
6966      (SHIFTRT:VQN (match_operand:VQN 2 "register_operand")))
6967    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
6968   "TARGET_SIMD && AARCH64_VALID_SHRN_OP (<ALL_TRUNC:CODE>, <SHIFTRT:CODE>)"
6969   {
6970     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
6971                                                  INTVAL (operands[3]));
6972
6973     if (BYTES_BIG_ENDIAN)
6974       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_be (
6975                 operands[0], operands[1], operands[2], operands[3]));
6976     else
6977       emit_insn (gen_aarch64_<shrn_op><sra_op>shrn2_n<mode>_insn_le (
6978                 operands[0], operands[1], operands[2], operands[3]));
6979     DONE;
6980   }
6981 )
6982
6983 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_le"
6984   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
6985         (vec_concat:<VNARROWQ2>
6986           (match_operand:<VNARROWQ> 1 "register_operand" "0")
6987           (ALL_TRUNC:<VNARROWQ>
6988             (<TRUNC_SHIFT>:<V2XWIDE>
6989               (plus:<V2XWIDE>
6990                 (<TRUNCEXTEND>:<V2XWIDE>
6991                   (match_operand:VQN 2 "register_operand" "w"))
6992                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
6993               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))))]
6994   "TARGET_SIMD && !BYTES_BIG_ENDIAN
6995    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
6996   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
6997   [(set_attr "type" "neon_shift_imm_narrow_q")]
6998 )
6999
7000 (define_insn "aarch64_<shrn_op>rshrn2_n<mode>_insn_be"
7001   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7002         (vec_concat:<VNARROWQ2>
7003           (ALL_TRUNC:<VNARROWQ>
7004             (<TRUNC_SHIFT>:<V2XWIDE>
7005               (plus:<V2XWIDE>
7006                 (<TRUNCEXTEND>:<V2XWIDE>
7007                   (match_operand:VQN 2 "register_operand" "w"))
7008                 (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7009               (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>")))
7010           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7011   "TARGET_SIMD && BYTES_BIG_ENDIAN
7012    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7013   "<shrn_op>rshrn2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7014   [(set_attr "type" "neon_shift_imm_narrow_q")]
7015 )
7016
7017 (define_expand "aarch64_<shrn_op>rshrn2_n<mode>"
7018   [(match_operand:<VNARROWQ2> 0 "register_operand")
7019    (match_operand:<VNARROWQ> 1 "register_operand")
7020    (ALL_TRUNC:<VNARROWQ> (match_operand:VQN 2 "register_operand"))
7021    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7022   "TARGET_SIMD"
7023   {
7024     if (<CODE> == TRUNCATE
7025         && INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<VNARROWQ>mode))
7026       {
7027         rtx tmp = aarch64_gen_shareable_zero (<MODE>mode);
7028         emit_insn (gen_aarch64_raddhn2<mode> (operands[0], operands[1],
7029                                               operands[2], tmp));
7030         DONE;
7031       }
7032     /* Use this expander to create the rounding constant vector, which is
7033        1 << (shift - 1).  Use wide_int here to ensure that the right TImode
7034        RTL is generated when handling the DImode expanders.  */
7035     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7036     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7037     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7038     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7039     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7040     if (BYTES_BIG_ENDIAN)
7041       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_be (operands[0],
7042                                                               operands[1],
7043                                                               operands[2],
7044                                                               operands[3],
7045                                                               rnd));
7046     else
7047       emit_insn (gen_aarch64_<shrn_op>rshrn2_n<mode>_insn_le (operands[0],
7048                                                               operands[1],
7049                                                               operands[2],
7050                                                               operands[3],
7051                                                               rnd));
7052     DONE;
7053   }
7054 )
7055
7056 (define_insn "aarch64_sqshrun2_n<mode>_insn_le"
7057   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7058         (vec_concat:<VNARROWQ2>
7059           (match_operand:<VNARROWQ> 1 "register_operand" "0")
7060           (truncate:<VNARROWQ>
7061             (smin:VQN
7062               (smax:VQN
7063                 (ashiftrt:VQN
7064                   (match_operand:VQN 2 "register_operand" "w")
7065                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7066                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
7067               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))))]
7068   "TARGET_SIMD && !BYTES_BIG_ENDIAN"
7069   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7070   [(set_attr "type" "neon_shift_imm_narrow_q")]
7071 )
7072
7073 (define_insn "aarch64_sqshrun2_n<mode>_insn_be"
7074   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7075         (vec_concat:<VNARROWQ2>
7076           (truncate:<VNARROWQ>
7077             (smin:VQN
7078               (smax:VQN
7079                 (ashiftrt:VQN
7080                   (match_operand:VQN 2 "register_operand" "w")
7081                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7082                 (match_operand:VQN 4 "aarch64_simd_imm_zero"))
7083               (match_operand:VQN 5 "aarch64_simd_umax_half_mode")))
7084           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7085   "TARGET_SIMD && BYTES_BIG_ENDIAN"
7086   "sqshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7087   [(set_attr "type" "neon_shift_imm_narrow_q")]
7088 )
7089
7090 (define_expand "aarch64_sqshrun2_n<mode>"
7091   [(match_operand:<VNARROWQ2> 0 "register_operand")
7092    (match_operand:<VNARROWQ> 1 "register_operand")
7093    (match_operand:VQN 2 "register_operand")
7094    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7095   "TARGET_SIMD"
7096   {
7097     operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
7098                                                  INTVAL (operands[3]));
7099     rtx zeros = CONST0_RTX (<MODE>mode);
7100     rtx half_umax
7101       = aarch64_simd_gen_const_vector_dup (<MODE>mode,
7102                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7103     if (BYTES_BIG_ENDIAN)
7104       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_be (operands[0],
7105                                 operands[1], operands[2], operands[3],
7106                                 zeros, half_umax));
7107     else
7108       emit_insn (gen_aarch64_sqshrun2_n<mode>_insn_le (operands[0],
7109                                 operands[1], operands[2], operands[3],
7110                                 zeros, half_umax));
7111     DONE;
7112   }
7113 )
7114
7115 (define_insn "aarch64_sqrshrun2_n<mode>_insn_le"
7116   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7117         (vec_concat:<VNARROWQ2>
7118           (match_operand:<VNARROWQ> 1 "register_operand" "0")
7119           (truncate:<VNARROWQ>
7120             (smin:<V2XWIDE>
7121               (smax:<V2XWIDE>
7122                 (ashiftrt:<V2XWIDE>
7123                   (plus:<V2XWIDE>
7124                     (sign_extend:<V2XWIDE>
7125                       (match_operand:VQN 2 "register_operand" "w"))
7126                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7127                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7128                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7129               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))))]
7130   "TARGET_SIMD && !BYTES_BIG_ENDIAN
7131    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7132   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7133   [(set_attr "type" "neon_shift_imm_narrow_q")]
7134 )
7135
7136 (define_insn "aarch64_sqrshrun2_n<mode>_insn_be"
7137   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
7138         (vec_concat:<VNARROWQ2>
7139           (truncate:<VNARROWQ>
7140             (smin:<V2XWIDE>
7141               (smax:<V2XWIDE>
7142                 (ashiftrt:<V2XWIDE>
7143                   (plus:<V2XWIDE>
7144                     (sign_extend:<V2XWIDE>
7145                       (match_operand:VQN 2 "register_operand" "w"))
7146                     (match_operand:<V2XWIDE> 4 "aarch64_int_rnd_operand"))
7147                   (match_operand:VQN 3 "aarch64_simd_shift_imm_vec_<vn_mode>"))
7148                 (match_operand:<V2XWIDE> 5 "aarch64_simd_imm_zero"))
7149               (match_operand:<V2XWIDE> 6 "aarch64_simd_umax_quarter_mode")))
7150           (match_operand:<VNARROWQ> 1 "register_operand" "0")))]
7151   "TARGET_SIMD && BYTES_BIG_ENDIAN
7152    && aarch64_const_vec_rnd_cst_p (operands[4], operands[3])"
7153   "sqrshrun2\t%<vn2>0.<V2ntype>, %<v>2.<Vtype>, %3"
7154   [(set_attr "type" "neon_shift_imm_narrow_q")]
7155 )
7156
7157 (define_expand "aarch64_sqrshrun2_n<mode>"
7158   [(match_operand:<VNARROWQ2> 0 "register_operand")
7159    (match_operand:<VNARROWQ> 1 "register_operand")
7160    (match_operand:VQN 2 "register_operand")
7161    (match_operand:SI 3 "aarch64_simd_shift_imm_offset_<vn_mode>")]
7162   "TARGET_SIMD"
7163   {
7164     int prec = GET_MODE_UNIT_PRECISION (<V2XWIDE>mode);
7165     wide_int rnd_wi = wi::set_bit_in_zero (INTVAL (operands[3]) - 1, prec);
7166     rtx rnd = immed_wide_int_const (rnd_wi, GET_MODE_INNER (<V2XWIDE>mode));
7167     rnd = gen_const_vec_duplicate (<V2XWIDE>mode, rnd);
7168     rtx zero = CONST0_RTX (<V2XWIDE>mode);
7169     rtx half_umax
7170       = aarch64_simd_gen_const_vector_dup (<V2XWIDE>mode,
7171                         GET_MODE_MASK (GET_MODE_INNER (<VNARROWQ>mode)));
7172     operands[3] = gen_const_vec_duplicate (<MODE>mode, operands[3]);
7173     if (BYTES_BIG_ENDIAN)
7174       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_be (operands[0],
7175                                 operands[1], operands[2], operands[3], rnd,
7176                                 zero, half_umax));
7177     else
7178       emit_insn (gen_aarch64_sqrshrun2_n<mode>_insn_le (operands[0],
7179                                 operands[1], operands[2], operands[3], rnd,
7180                                 zero, half_umax));
7181     DONE;
7182   }
7183 )
7184
7185 ;; cm(eq|ge|gt|lt|le)
7186 ;; Note, we have constraints for Dz and Z as different expanders
7187 ;; have different ideas of what should be passed to this pattern.
7188
7189 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7190   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7191         (neg:<V_INT_EQUIV>
7192           (COMPARISONS:<V_INT_EQUIV>
7193             (match_operand:VDQ_I 1 "register_operand")
7194             (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero")
7195           )))]
7196   "TARGET_SIMD"
7197   {@ [ cons: =0 , 1 , 2   ; attrs: type           ]
7198      [ w        , w , w   ; neon_compare<q>       ] cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7199      [ w        , w , ZDz ; neon_compare_zero<q>  ] cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0
7200   }
7201 )
7202
7203 (define_insn_and_split "aarch64_cm<optab>di"
7204   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
7205         (neg:DI
7206           (COMPARISONS:DI
7207             (match_operand:DI 1 "register_operand" "w,w,r")
7208             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
7209           )))
7210      (clobber (reg:CC CC_REGNUM))]
7211   "TARGET_SIMD"
7212   "#"
7213   "&& reload_completed"
7214   [(set (match_operand:DI 0 "register_operand")
7215         (neg:DI
7216           (COMPARISONS:DI
7217             (match_operand:DI 1 "register_operand")
7218             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7219           )))]
7220   {
7221     /* If we are in the general purpose register file,
7222        we split to a sequence of comparison and store.  */
7223     if (GP_REGNUM_P (REGNO (operands[0]))
7224         && GP_REGNUM_P (REGNO (operands[1])))
7225       {
7226         machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
7227         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7228         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7229         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7230         DONE;
7231       }
7232     /* Otherwise, we expand to a similar pattern which does not
7233        clobber CC_REGNUM.  */
7234   }
7235   [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
7236 )
7237
7238 (define_insn "*aarch64_cm<optab>di"
7239   [(set (match_operand:DI 0 "register_operand")
7240         (neg:DI
7241           (COMPARISONS:DI
7242             (match_operand:DI 1 "register_operand")
7243             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7244           )))]
7245   "TARGET_SIMD && reload_completed"
7246   {@ [ cons: =0 , 1 , 2   ; attrs: type        ]
7247      [ w        , w , w   ; neon_compare       ] cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
7248      [ w        , w , ZDz ; neon_compare_zero  ] cm<optab>\t%d0, %d1, #0
7249   }
7250 )
7251
7252 ;; cm(hs|hi)
7253
7254 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7255   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7256         (neg:<V_INT_EQUIV>
7257           (UCOMPARISONS:<V_INT_EQUIV>
7258             (match_operand:VDQ_I 1 "register_operand" "w")
7259             (match_operand:VDQ_I 2 "register_operand" "w")
7260           )))]
7261   "TARGET_SIMD"
7262   "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7263   [(set_attr "type" "neon_compare<q>")]
7264 )
7265
7266 (define_insn_and_split "aarch64_cm<optab>di"
7267   [(set (match_operand:DI 0 "register_operand" "=w,r")
7268         (neg:DI
7269           (UCOMPARISONS:DI
7270             (match_operand:DI 1 "register_operand" "w,r")
7271             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
7272           )))
7273     (clobber (reg:CC CC_REGNUM))]
7274   "TARGET_SIMD"
7275   "#"
7276   "&& reload_completed"
7277   [(set (match_operand:DI 0 "register_operand")
7278         (neg:DI
7279           (UCOMPARISONS:DI
7280             (match_operand:DI 1 "register_operand")
7281             (match_operand:DI 2 "aarch64_simd_reg_or_zero")
7282           )))]
7283   {
7284     /* If we are in the general purpose register file,
7285        we split to a sequence of comparison and store.  */
7286     if (GP_REGNUM_P (REGNO (operands[0]))
7287         && GP_REGNUM_P (REGNO (operands[1])))
7288       {
7289         machine_mode mode = CCmode;
7290         rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
7291         rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
7292         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7293         DONE;
7294       }
7295     /* Otherwise, we expand to a similar pattern which does not
7296        clobber CC_REGNUM.  */
7297   }
7298   [(set_attr "type" "neon_compare,multiple")]
7299 )
7300
7301 (define_insn "*aarch64_cm<optab>di"
7302   [(set (match_operand:DI 0 "register_operand" "=w")
7303         (neg:DI
7304           (UCOMPARISONS:DI
7305             (match_operand:DI 1 "register_operand" "w")
7306             (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
7307           )))]
7308   "TARGET_SIMD && reload_completed"
7309   "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
7310   [(set_attr "type" "neon_compare")]
7311 )
7312
7313 ;; cmtst
7314
7315 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
7316 ;; we don't have any insns using ne, and aarch64_vcond outputs
7317 ;; not (neg (eq (and x y) 0))
7318 ;; which is rewritten by simplify_rtx as
7319 ;; plus (eq (and x y) 0) -1.
7320
7321 (define_insn "aarch64_cmtst<mode><vczle><vczbe>"
7322   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7323         (plus:<V_INT_EQUIV>
7324           (eq:<V_INT_EQUIV>
7325             (and:VDQ_I
7326               (match_operand:VDQ_I 1 "register_operand" "w")
7327               (match_operand:VDQ_I 2 "register_operand" "w"))
7328             (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
7329           (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
7330   ]
7331   "TARGET_SIMD"
7332   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7333   [(set_attr "type" "neon_tst<q>")]
7334 )
7335
7336 ;; One can also get a cmtsts by having to combine a
7337 ;; not (neq (eq x 0)) in which case you rewrite it to
7338 ;; a comparison against itself
7339
7340 (define_insn "*aarch64_cmtst_same_<mode><vczle><vczbe>"
7341   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7342         (plus:<V_INT_EQUIV>
7343           (eq:<V_INT_EQUIV>
7344             (match_operand:VDQ_I 1 "register_operand" "w")
7345             (match_operand:VDQ_I 2 "aarch64_simd_imm_zero"))
7346           (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_imm_minus_one")))
7347   ]
7348   "TARGET_SIMD"
7349   "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>1<Vmtype>"
7350   [(set_attr "type" "neon_tst<q>")]
7351 )
7352
7353 (define_insn_and_split "aarch64_cmtstdi"
7354   [(set (match_operand:DI 0 "register_operand" "=w,r")
7355         (neg:DI
7356           (ne:DI
7357             (and:DI
7358               (match_operand:DI 1 "register_operand" "w,r")
7359               (match_operand:DI 2 "register_operand" "w,r"))
7360             (const_int 0))))
7361     (clobber (reg:CC CC_REGNUM))]
7362   "TARGET_SIMD"
7363   "#"
7364   "&& reload_completed"
7365   [(set (match_operand:DI 0 "register_operand")
7366         (neg:DI
7367           (ne:DI
7368             (and:DI
7369               (match_operand:DI 1 "register_operand")
7370               (match_operand:DI 2 "register_operand"))
7371             (const_int 0))))]
7372   {
7373     /* If we are in the general purpose register file,
7374        we split to a sequence of comparison and store.  */
7375     if (GP_REGNUM_P (REGNO (operands[0]))
7376         && GP_REGNUM_P (REGNO (operands[1])))
7377       {
7378         rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
7379         machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
7380         rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
7381         rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
7382         emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
7383         DONE;
7384       }
7385     /* Otherwise, we expand to a similar pattern which does not
7386        clobber CC_REGNUM.  */
7387   }
7388   [(set_attr "type" "neon_tst,multiple")]
7389 )
7390
7391 (define_insn "*aarch64_cmtstdi<vczle><vczbe>"
7392   [(set (match_operand:DI 0 "register_operand" "=w")
7393         (neg:DI
7394           (ne:DI
7395             (and:DI
7396               (match_operand:DI 1 "register_operand" "w")
7397               (match_operand:DI 2 "register_operand" "w"))
7398             (const_int 0))))]
7399   "TARGET_SIMD"
7400   "cmtst\t%d0, %d1, %d2"
7401   [(set_attr "type" "neon_tst")]
7402 )
7403
7404 ;; fcm(eq|ge|gt|le|lt)
7405
7406 (define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
7407   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7408         (neg:<V_INT_EQUIV>
7409           (COMPARISONS:<V_INT_EQUIV>
7410             (match_operand:VHSDF_HSDF 1 "register_operand")
7411             (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero")
7412           )))]
7413   "TARGET_SIMD"
7414   {@ [ cons: =0 , 1 , 2    ]
7415      [ w        , w , w    ] fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
7416      [ w        , w , YDz  ] fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0
7417   }
7418   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7419 )
7420
7421 ;; fac(ge|gt)
7422 ;; Note we can also handle what would be fac(le|lt) by
7423 ;; generating fac(ge|gt).
7424
7425 (define_insn "aarch64_fac<optab><mode><vczle><vczbe>"
7426   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
7427         (neg:<V_INT_EQUIV>
7428           (FAC_COMPARISONS:<V_INT_EQUIV>
7429             (abs:VHSDF_HSDF
7430               (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
7431             (abs:VHSDF_HSDF
7432               (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
7433   )))]
7434   "TARGET_SIMD"
7435   "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
7436   [(set_attr "type" "neon_fp_compare_<stype><q>")]
7437 )
7438
7439 ;; addp
7440
7441 ;; ADDP with two registers semantically concatenates them and performs
7442 ;; a pairwise addition on the result.  For 128-bit input modes represent this
7443 ;; as a concatentation of the pairwise addition results of the two input
7444 ;; registers.  This allow us to avoid using intermediate 256-bit modes.
7445 (define_insn "aarch64_addp<mode>_insn"
7446   [(set (match_operand:VQ_I 0 "register_operand" "=w")
7447         (vec_concat:VQ_I
7448           (plus:<VHALF>
7449             (vec_select:<VHALF>
7450               (match_operand:VQ_I 1 "register_operand" "w")
7451               (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))
7452             (vec_select:<VHALF>
7453               (match_dup 1)
7454               (match_operand:VQ_I 4 "vect_par_cnst_even_or_odd_half")))
7455           (plus:<VHALF>
7456             (vec_select:<VHALF>
7457               (match_operand:VQ_I 2 "register_operand" "w")
7458               (match_dup 3))
7459             (vec_select:<VHALF>
7460               (match_dup 2)
7461               (match_dup 4)))))]
7462   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7463   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7464   [(set_attr "type" "neon_reduc_add<q>")]
7465 )
7466
7467 ;; For 64-bit input modes an ADDP is represented as a concatentation
7468 ;; of the input registers into an 128-bit register which is then fed
7469 ;; into a pairwise add.  That way we avoid having to create intermediate
7470 ;; 32-bit vector modes.
7471 (define_insn "aarch64_addp<mode><vczle><vczbe>_insn"
7472   [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
7473         (plus:VD_BHSI
7474           (vec_select:VD_BHSI
7475             (vec_concat:<VDBL>
7476               (match_operand:VD_BHSI 1 "register_operand" "w")
7477               (match_operand:VD_BHSI 2 "register_operand" "w"))
7478             (match_operand:<VDBL> 3 "vect_par_cnst_even_or_odd_half"))
7479           (vec_select:VD_BHSI
7480             (vec_concat:<VDBL>
7481               (match_dup 1)
7482               (match_dup 2))
7483             (match_operand:<VDBL> 4 "vect_par_cnst_even_or_odd_half"))))]
7484   "TARGET_SIMD && !rtx_equal_p (operands[3], operands[4])"
7485   "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
7486   [(set_attr "type" "neon_reduc_add<q>")]
7487 )
7488
7489 ;; A common usecase of 64-bit ADDP is to have both operands come from the same
7490 ;; 128-bit vector and produce the pairwise addition results in the lower half.
7491 ;; Split into the 128-bit ADDP form and extract the low half.
7492 (define_insn_and_split "*aarch64_addp_same_reg<mode>"
7493   [(set (match_operand:<VHALF> 0 "register_operand" "=w")
7494         (plus:<VHALF>
7495           (vec_select:<VHALF>
7496             (match_operand:VQ_I 1 "register_operand" "w")
7497             (match_operand:VQ_I 2 "vect_par_cnst_even_or_odd_half"))
7498           (vec_select:<VHALF>
7499             (match_dup 1)
7500             (match_operand:VQ_I 3 "vect_par_cnst_even_or_odd_half"))))]
7501   "TARGET_SIMD && !rtx_equal_p (operands[2], operands[3])"
7502   "#"
7503   "&& 1"
7504   [(const_int 0)]
7505   {
7506     rtx scratch;
7507     if (can_create_pseudo_p ())
7508       scratch = gen_reg_rtx (<MODE>mode);
7509     else
7510       scratch = lowpart_subreg (<MODE>mode, operands[0], <VHALF>mode);
7511
7512     emit_insn (gen_aarch64_addp<mode>_insn (scratch, operands[1], operands[1],
7513                                             operands[2], operands[3]));
7514     emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, scratch));
7515     DONE;
7516   }
7517 )
7518
7519 (define_expand "aarch64_addp<mode>"
7520   [(match_operand:VDQ_I 0 "register_operand")
7521    (match_operand:VDQ_I 1 "register_operand")
7522    (match_operand:VDQ_I 2 "register_operand")]
7523   "TARGET_SIMD"
7524   {
7525     int nunits = GET_MODE_NUNITS (<MODE>mode).to_constant ();
7526     if (known_eq (GET_MODE_BITSIZE (<MODE>mode), 128))
7527       nunits /= 2;
7528     rtx par_even = aarch64_gen_stepped_int_parallel (nunits, 0, 2);
7529     rtx par_odd = aarch64_gen_stepped_int_parallel (nunits, 1, 2);
7530     emit_insn (gen_aarch64_addp<mode>_insn (operands[0], operands[1],
7531                                             operands[2], par_even, par_odd));
7532     DONE;
7533   }
7534 )
7535
7536 ;; sqrt
7537
7538 (define_expand "sqrt<mode>2"
7539   [(set (match_operand:VHSDF 0 "register_operand")
7540         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand")))]
7541   "TARGET_SIMD"
7542 {
7543   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
7544     DONE;
7545 })
7546
7547 (define_insn "*sqrt<mode>2<vczle><vczbe>"
7548   [(set (match_operand:VHSDF 0 "register_operand" "=w")
7549         (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
7550   "TARGET_SIMD"
7551   "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
7552   [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
7553 )
7554
7555 ;; Patterns for vector struct loads and stores.
7556
7557 (define_insn "aarch64_simd_ld2<vstruct_elt>"
7558   [(set (match_operand:VSTRUCT_2Q 0 "register_operand" "=w")
7559         (unspec:VSTRUCT_2Q [
7560           (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand" "Utv")]
7561           UNSPEC_LD2))]
7562   "TARGET_SIMD"
7563   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7564   [(set_attr "type" "neon_load2_2reg<q>")]
7565 )
7566
7567 (define_insn "@aarch64_simd_ld2r<vstruct_elt>"
7568   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7569         (unspec:VSTRUCT_2QD [
7570           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7571           UNSPEC_LD2_DUP))]
7572   "TARGET_SIMD"
7573   "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
7574   [(set_attr "type" "neon_load2_all_lanes<q>")]
7575 )
7576
7577 (define_insn "@aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7578   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
7579         (unspec:VSTRUCT_2QD [
7580                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7581                 (match_operand:VSTRUCT_2QD 2 "register_operand" "0")
7582                 (match_operand:SI 3 "immediate_operand" "i")]
7583                 UNSPEC_LD2_LANE))]
7584   "TARGET_SIMD"
7585   {
7586     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7587                                            INTVAL (operands[3]));
7588     return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
7589   }
7590   [(set_attr "type" "neon_load2_one_lane")]
7591 )
7592
7593 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7594   [(set (match_operand:VSTRUCT_2Q 0 "register_operand")
7595         (unspec:VSTRUCT_2Q [
7596                 (match_operand:VSTRUCT_2Q 1 "aarch64_simd_struct_operand")]
7597                 UNSPEC_LD2))]
7598   "TARGET_SIMD"
7599 {
7600   if (BYTES_BIG_ENDIAN)
7601     {
7602       rtx tmp = gen_reg_rtx (<MODE>mode);
7603       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7604                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7605       emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (tmp, operands[1]));
7606       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7607     }
7608   else
7609     emit_insn (gen_aarch64_simd_ld2<vstruct_elt> (operands[0], operands[1]));
7610   DONE;
7611 })
7612
7613 (define_insn "aarch64_simd_st2<vstruct_elt>"
7614   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand" "=Utv")
7615         (unspec:VSTRUCT_2Q [
7616                 (match_operand:VSTRUCT_2Q 1 "register_operand" "w")]
7617                 UNSPEC_ST2))]
7618   "TARGET_SIMD"
7619   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
7620   [(set_attr "type" "neon_store2_2reg<q>")]
7621 )
7622
7623 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7624 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7625   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7626         (unspec:BLK [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")
7627                      (match_operand:SI 2 "immediate_operand" "i")]
7628                      UNSPEC_ST2_LANE))]
7629   "TARGET_SIMD"
7630   {
7631     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7632                                            INTVAL (operands[2]));
7633     return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
7634   }
7635   [(set_attr "type" "neon_store2_one_lane<q>")]
7636 )
7637
7638 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7639   [(set (match_operand:VSTRUCT_2Q 0 "aarch64_simd_struct_operand")
7640         (unspec:VSTRUCT_2Q [(match_operand:VSTRUCT_2Q 1 "register_operand")]
7641                    UNSPEC_ST2))]
7642   "TARGET_SIMD"
7643 {
7644   if (BYTES_BIG_ENDIAN)
7645     {
7646       rtx tmp = gen_reg_rtx (<MODE>mode);
7647       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7648                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7649       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7650       emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], tmp));
7651     }
7652   else
7653     emit_insn (gen_aarch64_simd_st2<vstruct_elt> (operands[0], operands[1]));
7654   DONE;
7655 })
7656
7657 (define_insn "aarch64_simd_ld3<vstruct_elt>"
7658   [(set (match_operand:VSTRUCT_3Q 0 "register_operand" "=w")
7659         (unspec:VSTRUCT_3Q [
7660           (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand" "Utv")]
7661           UNSPEC_LD3))]
7662   "TARGET_SIMD"
7663   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7664   [(set_attr "type" "neon_load3_3reg<q>")]
7665 )
7666
7667 (define_insn "@aarch64_simd_ld3r<vstruct_elt>"
7668   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7669         (unspec:VSTRUCT_3QD [
7670           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7671           UNSPEC_LD3_DUP))]
7672   "TARGET_SIMD"
7673   "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
7674   [(set_attr "type" "neon_load3_all_lanes<q>")]
7675 )
7676
7677 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7678   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
7679         (unspec:VSTRUCT_3QD [
7680                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7681                 (match_operand:VSTRUCT_3QD 2 "register_operand" "0")
7682                 (match_operand:SI 3 "immediate_operand" "i")]
7683                 UNSPEC_LD3_LANE))]
7684   "TARGET_SIMD"
7685 {
7686     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7687                                            INTVAL (operands[3]));
7688     return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
7689 }
7690   [(set_attr "type" "neon_load3_one_lane")]
7691 )
7692
7693 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7694   [(set (match_operand:VSTRUCT_3Q 0 "register_operand")
7695         (unspec:VSTRUCT_3Q [
7696                 (match_operand:VSTRUCT_3Q 1 "aarch64_simd_struct_operand")]
7697                 UNSPEC_LD3))]
7698   "TARGET_SIMD"
7699 {
7700   if (BYTES_BIG_ENDIAN)
7701     {
7702       rtx tmp = gen_reg_rtx (<MODE>mode);
7703       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7704                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7705       emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (tmp, operands[1]));
7706       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7707     }
7708   else
7709     emit_insn (gen_aarch64_simd_ld3<vstruct_elt> (operands[0], operands[1]));
7710   DONE;
7711 })
7712
7713 (define_insn "aarch64_simd_st3<vstruct_elt>"
7714   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand" "=Utv")
7715         (unspec:VSTRUCT_3Q [(match_operand:VSTRUCT_3Q 1 "register_operand" "w")]
7716                    UNSPEC_ST3))]
7717   "TARGET_SIMD"
7718   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
7719   [(set_attr "type" "neon_store3_3reg<q>")]
7720 )
7721
7722 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7723 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7724   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7725         (unspec:BLK [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")
7726                      (match_operand:SI 2 "immediate_operand" "i")]
7727                      UNSPEC_ST3_LANE))]
7728   "TARGET_SIMD"
7729   {
7730     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7731                                            INTVAL (operands[2]));
7732     return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
7733   }
7734   [(set_attr "type" "neon_store3_one_lane<q>")]
7735 )
7736
7737 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7738   [(set (match_operand:VSTRUCT_3Q 0 "aarch64_simd_struct_operand")
7739         (unspec:VSTRUCT_3Q [
7740                 (match_operand:VSTRUCT_3Q 1 "register_operand")]
7741                 UNSPEC_ST3))]
7742   "TARGET_SIMD"
7743 {
7744   if (BYTES_BIG_ENDIAN)
7745     {
7746       rtx tmp = gen_reg_rtx (<MODE>mode);
7747       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7748                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7749       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7750       emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], tmp));
7751     }
7752   else
7753     emit_insn (gen_aarch64_simd_st3<vstruct_elt> (operands[0], operands[1]));
7754   DONE;
7755 })
7756
7757 (define_insn "aarch64_simd_ld4<vstruct_elt>"
7758   [(set (match_operand:VSTRUCT_4Q 0 "register_operand" "=w")
7759         (unspec:VSTRUCT_4Q [
7760           (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand" "Utv")]
7761           UNSPEC_LD4))]
7762   "TARGET_SIMD"
7763   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7764   [(set_attr "type" "neon_load4_4reg<q>")]
7765 )
7766
7767 (define_insn "@aarch64_simd_ld4r<vstruct_elt>"
7768   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7769         (unspec:VSTRUCT_4QD [
7770           (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
7771           UNSPEC_LD4_DUP))]
7772   "TARGET_SIMD"
7773   "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
7774   [(set_attr "type" "neon_load4_all_lanes<q>")]
7775 )
7776
7777 (define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
7778   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
7779         (unspec:VSTRUCT_4QD [
7780                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
7781                 (match_operand:VSTRUCT_4QD 2 "register_operand" "0")
7782                 (match_operand:SI 3 "immediate_operand" "i")]
7783                 UNSPEC_LD4_LANE))]
7784   "TARGET_SIMD"
7785 {
7786     operands[3] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7787                                            INTVAL (operands[3]));
7788     return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
7789 }
7790   [(set_attr "type" "neon_load4_one_lane")]
7791 )
7792
7793 (define_expand "vec_load_lanes<mode><vstruct_elt>"
7794   [(set (match_operand:VSTRUCT_4Q 0 "register_operand")
7795         (unspec:VSTRUCT_4Q [
7796                 (match_operand:VSTRUCT_4Q 1 "aarch64_simd_struct_operand")]
7797                 UNSPEC_LD4))]
7798   "TARGET_SIMD"
7799 {
7800   if (BYTES_BIG_ENDIAN)
7801     {
7802       rtx tmp = gen_reg_rtx (<MODE>mode);
7803       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7804                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7805       emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (tmp, operands[1]));
7806       emit_insn (gen_aarch64_rev_reglist<mode> (operands[0], tmp, mask));
7807     }
7808   else
7809     emit_insn (gen_aarch64_simd_ld4<vstruct_elt> (operands[0], operands[1]));
7810   DONE;
7811 })
7812
7813 (define_insn "aarch64_simd_st4<vstruct_elt>"
7814   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand" "=Utv")
7815         (unspec:VSTRUCT_4Q [
7816                 (match_operand:VSTRUCT_4Q 1 "register_operand" "w")]
7817                 UNSPEC_ST4))]
7818   "TARGET_SIMD"
7819   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
7820   [(set_attr "type" "neon_store4_4reg<q>")]
7821 )
7822
7823 ;; RTL uses GCC vector extension indices, so flip only for assembly.
7824 (define_insn "aarch64_vec_store_lanes<mode>_lane<vstruct_elt>"
7825   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
7826         (unspec:BLK [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")
7827                      (match_operand:SI 2 "immediate_operand" "i")]
7828                      UNSPEC_ST4_LANE))]
7829   "TARGET_SIMD"
7830   {
7831     operands[2] = aarch64_endian_lane_rtx (<VSTRUCT_ELT>mode,
7832                                            INTVAL (operands[2]));
7833     return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
7834   }
7835   [(set_attr "type" "neon_store4_one_lane<q>")]
7836 )
7837
7838 (define_expand "vec_store_lanes<mode><vstruct_elt>"
7839   [(set (match_operand:VSTRUCT_4Q 0 "aarch64_simd_struct_operand")
7840         (unspec:VSTRUCT_4Q [(match_operand:VSTRUCT_4Q 1 "register_operand")]
7841                    UNSPEC_ST4))]
7842   "TARGET_SIMD"
7843 {
7844   if (BYTES_BIG_ENDIAN)
7845     {
7846       rtx tmp = gen_reg_rtx (<MODE>mode);
7847       rtx mask = aarch64_reverse_mask (<VSTRUCT_ELT>mode,
7848                         GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>);
7849       emit_insn (gen_aarch64_rev_reglist<mode> (tmp, operands[1], mask));
7850       emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], tmp));
7851     }
7852   else
7853     emit_insn (gen_aarch64_simd_st4<vstruct_elt> (operands[0], operands[1]));
7854   DONE;
7855 })
7856
7857 ;; Patterns for rcpc3 vector lane loads and stores.
7858
7859 (define_insn "aarch64_vec_stl1_lanes<mode>_lane<Vel>"
7860   [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Q")
7861         (unspec:BLK [(match_operand:V12DIF 1 "register_operand" "w")
7862                      (match_operand:SI 2 "immediate_operand" "i")]
7863                      UNSPEC_STL1_LANE))]
7864   "TARGET_RCPC3"
7865   {
7866     operands[2] = aarch64_endian_lane_rtx (<MODE>mode,
7867                                            INTVAL (operands[2]));
7868     return "stl1\\t{%S1.<Vetype>}[%2], %0";
7869   }
7870   [(set_attr "type" "neon_store2_one_lane")]
7871 )
7872
7873 (define_expand "aarch64_vec_stl1_lane<mode>"
7874  [(match_operand:DI 0 "register_operand")
7875   (match_operand:V12DIF 1 "register_operand")
7876   (match_operand:SI 2 "immediate_operand")]
7877   "TARGET_RCPC3"
7878 {
7879   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
7880   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7881
7882   aarch64_simd_lane_bounds (operands[2], 0,
7883                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7884   emit_insn (gen_aarch64_vec_stl1_lanes<mode>_lane<Vel> (mem,
7885                                         operands[1], operands[2]));
7886   DONE;
7887 })
7888
7889 (define_insn "aarch64_vec_ldap1_lanes<mode>_lane<Vel>"
7890   [(set (match_operand:V12DIF 0 "register_operand" "=w")
7891         (unspec:V12DIF [
7892                 (match_operand:BLK 1 "aarch64_simd_struct_operand" "Q")
7893                 (match_operand:V12DIF 2 "register_operand" "0")
7894                 (match_operand:SI 3 "immediate_operand" "i")]
7895                 UNSPEC_LDAP1_LANE))]
7896   "TARGET_RCPC3"
7897   {
7898     operands[3] = aarch64_endian_lane_rtx (<MODE>mode,
7899                                            INTVAL (operands[3]));
7900     return "ldap1\\t{%S0.<Vetype>}[%3], %1";
7901   }
7902   [(set_attr "type" "neon_load2_one_lane")]
7903 )
7904
7905 (define_expand "aarch64_vec_ldap1_lane<mode>"
7906   [(match_operand:V12DIF 0 "register_operand")
7907         (match_operand:DI 1 "register_operand")
7908         (match_operand:V12DIF 2 "register_operand")
7909         (match_operand:SI 3 "immediate_operand")]
7910   "TARGET_RCPC3"
7911 {
7912   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
7913   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
7914
7915   aarch64_simd_lane_bounds (operands[3], 0,
7916                             GET_MODE_NUNITS (<MODE>mode).to_constant (), NULL);
7917   emit_insn (gen_aarch64_vec_ldap1_lanes<mode>_lane<Vel> (operands[0],
7918                                 mem, operands[2], operands[3]));
7919   DONE;
7920 })
7921
7922 (define_insn_and_split "aarch64_rev_reglist<mode>"
7923 [(set (match_operand:VSTRUCT_QD 0 "register_operand" "=&w")
7924         (unspec:VSTRUCT_QD
7925                    [(match_operand:VSTRUCT_QD 1 "register_operand" "w")
7926                     (match_operand:V16QI 2 "register_operand" "w")]
7927                    UNSPEC_REV_REGLIST))]
7928   "TARGET_SIMD"
7929   "#"
7930   "&& reload_completed"
7931   [(const_int 0)]
7932 {
7933   int i;
7934   int nregs = GET_MODE_SIZE (<MODE>mode).to_constant () / UNITS_PER_VREG;
7935   for (i = 0; i < nregs; i++)
7936     {
7937       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
7938       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
7939       emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
7940     }
7941   DONE;
7942 }
7943   [(set_attr "type" "neon_tbl1_q")
7944    (set_attr "length" "<insn_count>")]
7945 )
7946
7947 ;; Reload patterns for AdvSIMD register list operands.
7948
7949 (define_expand "mov<mode>"
7950   [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
7951         (match_operand:VSTRUCT_QD 1 "general_operand"))]
7952   "TARGET_FLOAT"
7953 {
7954   if (known_eq (GET_MODE_SIZE (<MODE>mode), 16)
7955       && operands[1] == CONST0_RTX (<MODE>mode)
7956       && MEM_P (operands[0])
7957       && (can_create_pseudo_p ()
7958           || memory_address_p (TImode, XEXP (operands[0], 0))))
7959     {
7960       operands[0] = adjust_address (operands[0], TImode, 0);
7961       operands[1] = CONST0_RTX (TImode);
7962     }
7963   else if (can_create_pseudo_p ())
7964     {
7965       if (GET_CODE (operands[0]) != REG)
7966         operands[1] = force_reg (<MODE>mode, operands[1]);
7967     }
7968 })
7969
7970 (define_expand "mov<mode>"
7971   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
7972         (match_operand:VSTRUCT 1 "general_operand"))]
7973   "TARGET_FLOAT"
7974 {
7975   if (can_create_pseudo_p ())
7976     {
7977       if (GET_CODE (operands[0]) != REG)
7978         operands[1] = force_reg (<MODE>mode, operands[1]);
7979     }
7980 })
7981
7982 (define_expand "movv8di"
7983   [(set (match_operand:V8DI 0 "nonimmediate_operand")
7984         (match_operand:V8DI 1 "general_operand"))]
7985   ""
7986 {
7987   if (can_create_pseudo_p () && MEM_P (operands[0]))
7988     operands[1] = force_reg (V8DImode, operands[1]);
7989 })
7990
7991 (define_expand "@aarch64_ld1x3<vstruct_elt>"
7992   [(match_operand:VSTRUCT_3QD 0 "register_operand")
7993    (match_operand:DI 1 "register_operand")]
7994   "TARGET_SIMD"
7995 {
7996   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
7997   emit_insn (gen_aarch64_ld1_x3_<vstruct_elt> (operands[0], mem));
7998   DONE;
7999 })
8000
8001 (define_insn "aarch64_ld1_x3_<vstruct_elt>"
8002   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
8003         (unspec:VSTRUCT_3QD
8004           [(match_operand:VSTRUCT_3QD 1 "aarch64_simd_struct_operand" "Utv")]
8005           UNSPEC_LD1))]
8006   "TARGET_SIMD"
8007   "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8008   [(set_attr "type" "neon_load1_3reg<q>")]
8009 )
8010
8011 (define_expand "@aarch64_ld1x4<vstruct_elt>"
8012   [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
8013    (match_operand:DI 1 "register_operand" "r")]
8014   "TARGET_SIMD"
8015 {
8016   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8017   emit_insn (gen_aarch64_ld1_x4_<vstruct_elt> (operands[0], mem));
8018   DONE;
8019 })
8020
8021 (define_insn "aarch64_ld1_x4_<vstruct_elt>"
8022   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
8023         (unspec:VSTRUCT_4QD
8024           [(match_operand:VSTRUCT_4QD 1 "aarch64_simd_struct_operand" "Utv")]
8025         UNSPEC_LD1))]
8026   "TARGET_SIMD"
8027   "ld1\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8028   [(set_attr "type" "neon_load1_4reg<q>")]
8029 )
8030
8031 (define_expand "@aarch64_st1x2<vstruct_elt>"
8032   [(match_operand:DI 0 "register_operand")
8033    (match_operand:VSTRUCT_2QD 1 "register_operand")]
8034   "TARGET_SIMD"
8035 {
8036   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8037   emit_insn (gen_aarch64_st1_x2_<vstruct_elt> (mem, operands[1]));
8038   DONE;
8039 })
8040
8041 (define_insn "aarch64_st1_x2_<vstruct_elt>"
8042   [(set (match_operand:VSTRUCT_2QD 0 "aarch64_simd_struct_operand" "=Utv")
8043         (unspec:VSTRUCT_2QD
8044                 [(match_operand:VSTRUCT_2QD 1 "register_operand" "w")]
8045                 UNSPEC_ST1))]
8046   "TARGET_SIMD"
8047   "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8048   [(set_attr "type" "neon_store1_2reg<q>")]
8049 )
8050
8051 (define_expand "@aarch64_st1x3<vstruct_elt>"
8052   [(match_operand:DI 0 "register_operand")
8053    (match_operand:VSTRUCT_3QD 1 "register_operand")]
8054   "TARGET_SIMD"
8055 {
8056   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8057   emit_insn (gen_aarch64_st1_x3_<vstruct_elt> (mem, operands[1]));
8058   DONE;
8059 })
8060
8061 (define_insn "aarch64_st1_x3_<vstruct_elt>"
8062   [(set (match_operand:VSTRUCT_3QD 0 "aarch64_simd_struct_operand" "=Utv")
8063         (unspec:VSTRUCT_3QD
8064                 [(match_operand:VSTRUCT_3QD 1 "register_operand" "w")]
8065                 UNSPEC_ST1))]
8066   "TARGET_SIMD"
8067   "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8068   [(set_attr "type" "neon_store1_3reg<q>")]
8069 )
8070
8071 (define_expand "@aarch64_st1x4<vstruct_elt>"
8072   [(match_operand:DI 0 "register_operand" "")
8073    (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
8074   "TARGET_SIMD"
8075 {
8076   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8077   emit_insn (gen_aarch64_st1_x4_<vstruct_elt> (mem, operands[1]));
8078   DONE;
8079 })
8080
8081 (define_insn "aarch64_st1_x4_<vstruct_elt>"
8082   [(set (match_operand:VSTRUCT_4QD 0 "aarch64_simd_struct_operand" "=Utv")
8083         (unspec:VSTRUCT_4QD
8084                 [(match_operand:VSTRUCT_4QD 1 "register_operand" "w")]
8085                 UNSPEC_ST1))]
8086   "TARGET_SIMD"
8087   "st1\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8088   [(set_attr "type" "neon_store1_4reg<q>")]
8089 )
8090
8091 (define_insn "*aarch64_movv8di"
8092   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
8093         (match_operand:V8DI 1 "general_operand" " r,r,m"))]
8094   "(register_operand (operands[0], V8DImode)
8095     || register_operand (operands[1], V8DImode))"
8096   "#"
8097   [(set_attr "type" "multiple,multiple,multiple")
8098    (set_attr "length" "32,16,16")]
8099 )
8100
8101 (define_insn "aarch64_be_ld1<mode>"
8102   [(set (match_operand:VALLDI_F16 0     "register_operand" "=w")
8103         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
8104                              "aarch64_simd_struct_operand" "Utv")]
8105         UNSPEC_LD1))]
8106   "TARGET_SIMD"
8107   "ld1\\t{%0<Vmtype>}, %1"
8108   [(set_attr "type" "neon_load1_1reg<q>")]
8109 )
8110
8111 (define_insn "aarch64_be_st1<mode>"
8112   [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
8113         (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
8114         UNSPEC_ST1))]
8115   "TARGET_SIMD"
8116   "st1\\t{%1<Vmtype>}, %0"
8117   [(set_attr "type" "neon_store1_1reg<q>")]
8118 )
8119
8120 (define_insn "*aarch64_mov<mode>"
8121   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
8122         (match_operand:VSTRUCT_2D 1 "general_operand"))]
8123   "TARGET_FLOAT
8124    && (register_operand (operands[0], <MODE>mode)
8125        || register_operand (operands[1], <MODE>mode))"
8126   {@ [ cons: =0 , 1 ; attrs: type , length ]
8127      [ w        , w ; multiple    , 8      ] #
8128      [ m        , w ; neon_stp    , 4      ] stp\t%d1, %R1, %0
8129      [ w        , m ; neon_ldp    , 4      ] ldp\t%d0, %R0, %1
8130   }
8131 )
8132
8133 (define_insn "*aarch64_mov<mode>"
8134   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
8135         (match_operand:VSTRUCT_2Q 1 "general_operand"))]
8136   "TARGET_FLOAT
8137    && (register_operand (operands[0], <MODE>mode)
8138        || register_operand (operands[1], <MODE>mode))"
8139   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8140      [ w        , w ; multiple    , simd , 8      ] #
8141      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
8142      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8143   }
8144 )
8145
8146 (define_insn "*aarch64_movoi"
8147   [(set (match_operand:OI 0 "nonimmediate_operand")
8148         (match_operand:OI 1 "general_operand"))]
8149   "TARGET_FLOAT
8150    && (register_operand (operands[0], OImode)
8151        || register_operand (operands[1], OImode))"
8152   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
8153      [ w        , w ; multiple    , simd , 8      ] #
8154      [ m        , w ; neon_stp_q  , *    , 4      ] stp\t%q1, %R1, %0
8155      [ w        , m ; neon_ldp_q  , *    , 4      ] ldp\t%q0, %R0, %1
8156   }
8157 )
8158
8159 (define_insn "*aarch64_mov<mode>"
8160   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
8161         (match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
8162   "TARGET_FLOAT
8163    && (register_operand (operands[0], <MODE>mode)
8164        || register_operand (operands[1], <MODE>mode))"
8165   "#"
8166   [(set_attr "type" "multiple")
8167    (set_attr "arch" "fp<q>,*,*")
8168    (set_attr "length" "12,8,8")]
8169 )
8170
8171 (define_insn "*aarch64_movci"
8172   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
8173         (match_operand:CI 1 "general_operand"      " w,w,o"))]
8174   "TARGET_FLOAT
8175    && (register_operand (operands[0], CImode)
8176        || register_operand (operands[1], CImode))"
8177   "#"
8178   [(set_attr "type" "multiple")
8179    (set_attr "arch" "simd,*,*")
8180    (set_attr "length" "12,8,8")]
8181 )
8182
8183 (define_insn "*aarch64_mov<mode>"
8184   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
8185         (match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
8186   "TARGET_FLOAT
8187    && (register_operand (operands[0], <MODE>mode)
8188        || register_operand (operands[1], <MODE>mode))"
8189   "#"
8190   [(set_attr "type" "multiple")
8191    (set_attr "arch" "fp<q>,*,*")
8192    (set_attr "length" "16,8,8")]
8193 )
8194
8195 (define_insn "*aarch64_movxi"
8196   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
8197         (match_operand:XI 1 "general_operand"      " w,w,o"))]
8198   "TARGET_FLOAT
8199    && (register_operand (operands[0], XImode)
8200        || register_operand (operands[1], XImode))"
8201   "#"
8202   [(set_attr "type" "multiple")
8203    (set_attr "arch" "simd,*,*")
8204    (set_attr "length" "16,8,8")]
8205 )
8206
8207 (define_split
8208   [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
8209         (match_operand:VSTRUCT_2QD 1 "register_operand"))]
8210   "TARGET_FLOAT && reload_completed"
8211   [(const_int 0)]
8212 {
8213   aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
8214   DONE;
8215 })
8216
8217 (define_split
8218   [(set (match_operand:OI 0 "register_operand")
8219         (match_operand:OI 1 "register_operand"))]
8220   "TARGET_FLOAT && reload_completed"
8221   [(const_int 0)]
8222 {
8223   aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
8224   DONE;
8225 })
8226
8227 (define_split
8228   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
8229         (match_operand:VSTRUCT_3QD 1 "general_operand"))]
8230   "TARGET_FLOAT && reload_completed"
8231   [(const_int 0)]
8232 {
8233   if (register_operand (operands[0], <MODE>mode)
8234       && register_operand (operands[1], <MODE>mode))
8235     aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
8236   else
8237     {
8238       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8239       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8240       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8241                                            <MODE>mode, 0),
8242                       simplify_gen_subreg (pair_mode, operands[1],
8243                                            <MODE>mode, 0));
8244       emit_move_insn (gen_lowpart (<VSTRUCT_ELT>mode,
8245                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8246                                                         operands[0],
8247                                                         <MODE>mode,
8248                                                         2 * elt_size)),
8249                       gen_lowpart (<VSTRUCT_ELT>mode,
8250                                    simplify_gen_subreg (<VSTRUCT_ELT>mode,
8251                                                         operands[1],
8252                                                         <MODE>mode,
8253                                                         2 * elt_size)));
8254     }
8255   DONE;
8256 })
8257
8258 (define_split
8259   [(set (match_operand:CI 0 "nonimmediate_operand")
8260         (match_operand:CI 1 "general_operand"))]
8261   "TARGET_FLOAT && reload_completed"
8262   [(const_int 0)]
8263 {
8264   if (register_operand (operands[0], CImode)
8265       && register_operand (operands[1], CImode))
8266     aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
8267   else
8268     {
8269       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
8270                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
8271       emit_move_insn (gen_lowpart (V16QImode,
8272                                    simplify_gen_subreg (TImode, operands[0],
8273                                                         CImode, 32)),
8274                       gen_lowpart (V16QImode,
8275                                    simplify_gen_subreg (TImode, operands[1],
8276                                                         CImode, 32)));
8277     }
8278   DONE;
8279 })
8280
8281 (define_split
8282   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
8283         (match_operand:VSTRUCT_4QD 1 "general_operand"))]
8284   "TARGET_FLOAT && reload_completed"
8285   [(const_int 0)]
8286 {
8287   if (register_operand (operands[0], <MODE>mode)
8288       && register_operand (operands[1], <MODE>mode))
8289     aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
8290   else
8291     {
8292       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
8293       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
8294       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8295                                            <MODE>mode, 0),
8296                       simplify_gen_subreg (pair_mode, operands[1],
8297                                            <MODE>mode, 0));
8298       emit_move_insn (simplify_gen_subreg (pair_mode, operands[0],
8299                                            <MODE>mode, 2 * elt_size),
8300                       simplify_gen_subreg (pair_mode, operands[1],
8301                                            <MODE>mode, 2 * elt_size));
8302     }
8303   DONE;
8304 })
8305
8306 (define_split
8307   [(set (match_operand:XI 0 "nonimmediate_operand")
8308         (match_operand:XI 1 "general_operand"))]
8309   "TARGET_FLOAT && reload_completed"
8310   [(const_int 0)]
8311 {
8312   if (register_operand (operands[0], XImode)
8313       && register_operand (operands[1], XImode))
8314     aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
8315   else
8316     {
8317       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
8318                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
8319       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
8320                       simplify_gen_subreg (OImode, operands[1], XImode, 32));
8321     }
8322   DONE;
8323 })
8324
8325 (define_split
8326   [(set (match_operand:V8DI 0 "nonimmediate_operand")
8327         (match_operand:V8DI 1 "general_operand"))]
8328   "reload_completed"
8329   [(const_int 0)]
8330 {
8331   if (register_operand (operands[0], V8DImode)
8332       && register_operand (operands[1], V8DImode))
8333     {
8334       aarch64_simd_emit_reg_reg_move (operands, DImode, 8);
8335       DONE;
8336     }
8337   else if ((register_operand (operands[0], V8DImode)
8338             && memory_operand (operands[1], V8DImode))
8339            || (memory_operand (operands[0], V8DImode)
8340                && register_operand (operands[1], V8DImode)))
8341     {
8342       /* V8DI only guarantees 8-byte alignment, whereas TImode requires 16.  */
8343       auto mode = STRICT_ALIGNMENT ? DImode : TImode;
8344       int increment = GET_MODE_SIZE (mode);
8345       std::pair<rtx, rtx> last_pair = {};
8346       for (int offset = 0; offset < 64; offset += increment)
8347         {
8348           std::pair<rtx, rtx> pair = {
8349             simplify_gen_subreg (mode, operands[0], V8DImode, offset),
8350             simplify_gen_subreg (mode, operands[1], V8DImode, offset)
8351           };
8352           if (register_operand (pair.first, mode)
8353               && reg_overlap_mentioned_p (pair.first, pair.second))
8354             last_pair = pair;
8355           else
8356             emit_move_insn (pair.first, pair.second);
8357         }
8358       if (last_pair.first)
8359         emit_move_insn (last_pair.first, last_pair.second);
8360       DONE;
8361     }
8362   else
8363     FAIL;
8364 })
8365
8366 (define_expand "aarch64_ld<nregs>r<vstruct_elt>"
8367   [(match_operand:VSTRUCT_QD 0 "register_operand")
8368    (match_operand:DI 1 "register_operand")]
8369   "TARGET_SIMD"
8370 {
8371   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8372   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8373
8374   emit_insn (gen_aarch64_simd_ld<nregs>r<vstruct_elt> (operands[0], mem));
8375   DONE;
8376 })
8377
8378 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8379   [(set (match_operand:VSTRUCT_2DNX 0 "register_operand" "=w")
8380         (unspec:VSTRUCT_2DNX [
8381           (match_operand:VSTRUCT_2DNX 1 "aarch64_simd_struct_operand" "Utv")]
8382           UNSPEC_LD2_DREG))]
8383   "TARGET_SIMD"
8384   "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8385   [(set_attr "type" "neon_load2_2reg<q>")]
8386 )
8387
8388 (define_insn "aarch64_ld2<vstruct_elt>_dreg"
8389   [(set (match_operand:VSTRUCT_2DX 0 "register_operand" "=w")
8390         (unspec:VSTRUCT_2DX [
8391           (match_operand:VSTRUCT_2DX 1 "aarch64_simd_struct_operand" "Utv")]
8392           UNSPEC_LD2_DREG))]
8393   "TARGET_SIMD"
8394   "ld1\\t{%S0.1d - %T0.1d}, %1"
8395   [(set_attr "type" "neon_load1_2reg<q>")]
8396 )
8397
8398 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8399   [(set (match_operand:VSTRUCT_3DNX 0 "register_operand" "=w")
8400         (unspec:VSTRUCT_3DNX [
8401           (match_operand:VSTRUCT_3DNX 1 "aarch64_simd_struct_operand" "Utv")]
8402           UNSPEC_LD3_DREG))]
8403   "TARGET_SIMD"
8404   "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
8405   [(set_attr "type" "neon_load3_3reg<q>")]
8406 )
8407
8408 (define_insn "aarch64_ld3<vstruct_elt>_dreg"
8409   [(set (match_operand:VSTRUCT_3DX 0 "register_operand" "=w")
8410         (unspec:VSTRUCT_3DX [
8411           (match_operand:VSTRUCT_3DX 1 "aarch64_simd_struct_operand" "Utv")]
8412           UNSPEC_LD3_DREG))]
8413   "TARGET_SIMD"
8414   "ld1\\t{%S0.1d - %U0.1d}, %1"
8415   [(set_attr "type" "neon_load1_3reg<q>")]
8416 )
8417
8418 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8419   [(set (match_operand:VSTRUCT_4DNX 0 "register_operand" "=w")
8420         (unspec:VSTRUCT_4DNX [
8421           (match_operand:VSTRUCT_4DNX 1 "aarch64_simd_struct_operand" "Utv")]
8422           UNSPEC_LD4_DREG))]
8423   "TARGET_SIMD"
8424   "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
8425   [(set_attr "type" "neon_load4_4reg<q>")]
8426 )
8427
8428 (define_insn "aarch64_ld4<vstruct_elt>_dreg"
8429   [(set (match_operand:VSTRUCT_4DX 0 "register_operand" "=w")
8430         (unspec:VSTRUCT_4DX [
8431           (match_operand:VSTRUCT_4DX 1 "aarch64_simd_struct_operand" "Utv")]
8432           UNSPEC_LD4_DREG))]
8433   "TARGET_SIMD"
8434   "ld1\\t{%S0.1d - %V0.1d}, %1"
8435   [(set_attr "type" "neon_load1_4reg<q>")]
8436 )
8437
8438 (define_expand "@aarch64_ld<nregs><vstruct_elt>"
8439  [(match_operand:VSTRUCT_D 0 "register_operand")
8440   (match_operand:DI 1 "register_operand")]
8441   "TARGET_SIMD"
8442 {
8443   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8444   emit_insn (gen_aarch64_ld<nregs><vstruct_elt>_dreg (operands[0], mem));
8445   DONE;
8446 })
8447
8448 (define_expand "@aarch64_ld1<VALL_F16:mode>"
8449  [(match_operand:VALL_F16 0 "register_operand")
8450   (match_operand:DI 1 "register_operand")]
8451   "TARGET_SIMD"
8452 {
8453   machine_mode mode = <VALL_F16:MODE>mode;
8454   rtx mem = gen_rtx_MEM (mode, operands[1]);
8455
8456   if (BYTES_BIG_ENDIAN)
8457     emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
8458   else
8459     emit_move_insn (operands[0], mem);
8460   DONE;
8461 })
8462
8463 (define_expand "@aarch64_ld<nregs><vstruct_elt>"
8464  [(match_operand:VSTRUCT_Q 0 "register_operand")
8465   (match_operand:DI 1 "register_operand")]
8466   "TARGET_SIMD"
8467 {
8468   rtx mem = gen_rtx_MEM (<MODE>mode, operands[1]);
8469   emit_insn (gen_aarch64_simd_ld<nregs><vstruct_elt> (operands[0], mem));
8470   DONE;
8471 })
8472
8473 (define_expand "@aarch64_ld1x2<vstruct_elt>"
8474  [(match_operand:VSTRUCT_2QD 0 "register_operand")
8475   (match_operand:DI 1 "register_operand")]
8476   "TARGET_SIMD"
8477 {
8478   machine_mode mode = <MODE>mode;
8479   rtx mem = gen_rtx_MEM (mode, operands[1]);
8480
8481   emit_insn (gen_aarch64_simd_ld1<vstruct_elt>_x2 (operands[0], mem));
8482   DONE;
8483 })
8484
8485 (define_expand "@aarch64_ld<nregs>_lane<vstruct_elt>"
8486   [(match_operand:VSTRUCT_QD 0 "register_operand")
8487         (match_operand:DI 1 "register_operand")
8488         (match_operand:VSTRUCT_QD 2 "register_operand")
8489         (match_operand:SI 3 "immediate_operand")]
8490   "TARGET_SIMD"
8491 {
8492   rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
8493   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8494
8495   aarch64_simd_lane_bounds (operands[3], 0,
8496                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8497   emit_insn (gen_aarch64_vec_load_lanes<mode>_lane<vstruct_elt> (operands[0],
8498                                 mem, operands[2], operands[3]));
8499   DONE;
8500 })
8501
8502 ;; Permuted-store expanders for neon intrinsics.
8503
8504 ;; Permute instructions
8505
8506 ;; vec_perm support
8507
8508 (define_expand "vec_perm<mode>"
8509   [(match_operand:VB 0 "register_operand")
8510    (match_operand:VB 1 "register_operand")
8511    (match_operand:VB 2 "register_operand")
8512    (match_operand:VB 3 "register_operand")]
8513   "TARGET_SIMD"
8514 {
8515   aarch64_expand_vec_perm (operands[0], operands[1],
8516                            operands[2], operands[3], <nunits>);
8517   DONE;
8518 })
8519
8520 (define_insn "aarch64_qtbl1<mode>"
8521   [(set (match_operand:VB 0 "register_operand" "=w")
8522         (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
8523                     (match_operand:VB 2 "register_operand" "w")]
8524                    UNSPEC_TBL))]
8525   "TARGET_SIMD"
8526   "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
8527   [(set_attr "type" "neon_tbl1<q>")]
8528 )
8529
8530 (define_insn "aarch64_qtbx1<mode>"
8531   [(set (match_operand:VB 0 "register_operand" "=w")
8532         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8533                     (match_operand:V16QI 2 "register_operand" "w")
8534                     (match_operand:VB 3 "register_operand" "w")]
8535                    UNSPEC_TBX))]
8536   "TARGET_SIMD"
8537   "tbx\\t%0.<Vtype>, {%2.16b}, %3.<Vtype>"
8538   [(set_attr "type" "neon_tbl1<q>")]
8539 )
8540
8541 ;; Two source registers.
8542
8543 (define_insn "aarch64_qtbl2<mode>"
8544   [(set (match_operand:VB 0 "register_operand" "=w")
8545         (unspec:VB [(match_operand:V2x16QI 1 "register_operand" "w")
8546                       (match_operand:VB 2 "register_operand" "w")]
8547                       UNSPEC_TBL))]
8548   "TARGET_SIMD"
8549   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
8550   [(set_attr "type" "neon_tbl2")]
8551 )
8552
8553 (define_insn "aarch64_qtbx2<mode>"
8554   [(set (match_operand:VB 0 "register_operand" "=w")
8555         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8556                       (match_operand:V2x16QI 2 "register_operand" "w")
8557                       (match_operand:VB 3 "register_operand" "w")]
8558                       UNSPEC_TBX))]
8559   "TARGET_SIMD"
8560   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
8561   [(set_attr "type" "neon_tbl2")]
8562 )
8563
8564 ;; Three source registers.
8565
8566 (define_insn "aarch64_qtbl3<mode>"
8567   [(set (match_operand:VB 0 "register_operand" "=w")
8568         (unspec:VB [(match_operand:V3x16QI 1 "register_operand" "w")
8569                       (match_operand:VB 2 "register_operand" "w")]
8570                       UNSPEC_TBL))]
8571   "TARGET_SIMD"
8572   "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
8573   [(set_attr "type" "neon_tbl3")]
8574 )
8575
8576 (define_insn "aarch64_qtbx3<mode>"
8577   [(set (match_operand:VB 0 "register_operand" "=w")
8578         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8579                       (match_operand:V3x16QI 2 "register_operand" "w")
8580                       (match_operand:VB 3 "register_operand" "w")]
8581                       UNSPEC_TBX))]
8582   "TARGET_SIMD"
8583   "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
8584   [(set_attr "type" "neon_tbl3")]
8585 )
8586
8587 ;; Four source registers.
8588
8589 (define_insn "aarch64_qtbl4<mode>"
8590   [(set (match_operand:VB 0 "register_operand" "=w")
8591         (unspec:VB [(match_operand:V4x16QI 1 "register_operand" "w")
8592                       (match_operand:VB 2 "register_operand" "w")]
8593                       UNSPEC_TBL))]
8594   "TARGET_SIMD"
8595   "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
8596   [(set_attr "type" "neon_tbl4")]
8597 )
8598
8599 (define_insn "aarch64_qtbx4<mode>"
8600   [(set (match_operand:VB 0 "register_operand" "=w")
8601         (unspec:VB [(match_operand:VB 1 "register_operand" "0")
8602                       (match_operand:V4x16QI 2 "register_operand" "w")
8603                       (match_operand:VB 3 "register_operand" "w")]
8604                       UNSPEC_TBX))]
8605   "TARGET_SIMD"
8606   "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
8607   [(set_attr "type" "neon_tbl4")]
8608 )
8609
8610 (define_insn_and_split "aarch64_combinev16qi"
8611   [(set (match_operand:V2x16QI 0 "register_operand" "=w")
8612         (unspec:V2x16QI [(match_operand:V16QI 1 "register_operand" "w")
8613                          (match_operand:V16QI 2 "register_operand" "w")]
8614                         UNSPEC_CONCAT))]
8615   "TARGET_SIMD"
8616   "#"
8617   "&& 1"
8618   [(const_int 0)]
8619 {
8620   aarch64_split_combinev16qi (operands);
8621   DONE;
8622 }
8623 [(set_attr "type" "multiple")]
8624 )
8625
8626 ;; This instruction's pattern is generated directly by
8627 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8628 ;; need corresponding changes there.
8629 (define_insn "@aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
8630   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8631         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8632                           (match_operand:VALL_F16 2 "register_operand" "w")]
8633          PERMUTE))]
8634   "TARGET_SIMD"
8635   "<PERMUTE:perm_insn>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8636   [(set_attr "type" "neon_permute<q>")]
8637 )
8638
8639 ;; ZIP1 ignores the contents of the upper halves of the registers,
8640 ;; so we can describe 128-bit operations in terms of 64-bit inputs.
8641 (define_insn "aarch64_zip1<mode>_low"
8642   [(set (match_operand:VQ 0 "register_operand" "=w")
8643         (unspec:VQ [(match_operand:<VHALF> 1 "register_operand" "w")
8644                     (match_operand:<VHALF> 2 "register_operand" "w")]
8645                    UNSPEC_ZIP1))]
8646   "TARGET_SIMD"
8647   "zip1\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
8648   [(set_attr "type" "neon_permute_q")]
8649 )
8650
8651 ;; This instruction's pattern is generated directly by
8652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8653 ;; need corresponding changes there.  Note that the immediate (third)
8654 ;; operand is a lane index not a byte index.
8655 (define_insn "@aarch64_ext<mode>"
8656   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8657         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
8658                           (match_operand:VALL_F16 2 "register_operand" "w")
8659                           (match_operand:SI 3 "immediate_operand" "i")]
8660          UNSPEC_EXT))]
8661   "TARGET_SIMD"
8662 {
8663   operands[3] = GEN_INT (INTVAL (operands[3])
8664       * GET_MODE_UNIT_SIZE (<MODE>mode));
8665   return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
8666 }
8667   [(set_attr "type" "neon_ext<q>")]
8668 )
8669
8670 ;; This instruction's pattern is generated directly by
8671 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
8672 ;; need corresponding changes there.
8673 (define_insn "@aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
8674   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8675         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
8676                     REVERSE))]
8677   "TARGET_SIMD"
8678   "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
8679   [(set_attr "type" "neon_rev<q>")]
8680 )
8681
8682 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8683   [(set (match_operand:VSTRUCT_2DNX 0 "aarch64_simd_struct_operand" "=Utv")
8684         (unspec:VSTRUCT_2DNX [
8685                 (match_operand:VSTRUCT_2DNX 1 "register_operand" "w")]
8686                 UNSPEC_ST2))]
8687   "TARGET_SIMD"
8688   "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
8689   [(set_attr "type" "neon_store2_2reg")]
8690 )
8691
8692 (define_insn "aarch64_st2<vstruct_elt>_dreg"
8693   [(set (match_operand:VSTRUCT_2DX 0 "aarch64_simd_struct_operand" "=Utv")
8694         (unspec:VSTRUCT_2DX [
8695                 (match_operand:VSTRUCT_2DX 1 "register_operand" "w")]
8696                 UNSPEC_ST2))]
8697   "TARGET_SIMD"
8698   "st1\\t{%S1.1d - %T1.1d}, %0"
8699   [(set_attr "type" "neon_store1_2reg")]
8700 )
8701
8702 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8703   [(set (match_operand:VSTRUCT_3DNX 0 "aarch64_simd_struct_operand" "=Utv")
8704         (unspec:VSTRUCT_3DNX [
8705                 (match_operand:VSTRUCT_3DNX 1 "register_operand" "w")]
8706                 UNSPEC_ST3))]
8707   "TARGET_SIMD"
8708   "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
8709   [(set_attr "type" "neon_store3_3reg")]
8710 )
8711
8712 (define_insn "aarch64_st3<vstruct_elt>_dreg"
8713   [(set (match_operand:VSTRUCT_3DX 0 "aarch64_simd_struct_operand" "=Utv")
8714         (unspec:VSTRUCT_3DX [
8715                 (match_operand:VSTRUCT_3DX 1 "register_operand" "w")]
8716                 UNSPEC_ST3))]
8717   "TARGET_SIMD"
8718   "st1\\t{%S1.1d - %U1.1d}, %0"
8719   [(set_attr "type" "neon_store1_3reg")]
8720 )
8721
8722 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8723   [(set (match_operand:VSTRUCT_4DNX 0 "aarch64_simd_struct_operand" "=Utv")
8724         (unspec:VSTRUCT_4DNX [
8725                 (match_operand:VSTRUCT_4DNX 1 "register_operand" "w")]
8726                 UNSPEC_ST4))]
8727   "TARGET_SIMD"
8728   "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
8729   [(set_attr "type" "neon_store4_4reg")]
8730 )
8731
8732 (define_insn "aarch64_st4<vstruct_elt>_dreg"
8733   [(set (match_operand:VSTRUCT_4DX 0 "aarch64_simd_struct_operand" "=Utv")
8734         (unspec:VSTRUCT_4DX [
8735                 (match_operand:VSTRUCT_4DX 1 "register_operand" "w")]
8736                 UNSPEC_ST4))]
8737   "TARGET_SIMD"
8738   "st1\\t{%S1.1d - %V1.1d}, %0"
8739   [(set_attr "type" "neon_store1_4reg")]
8740 )
8741
8742 (define_expand "@aarch64_st<nregs><vstruct_elt>"
8743  [(match_operand:DI 0 "register_operand")
8744   (match_operand:VSTRUCT_D 1 "register_operand")]
8745   "TARGET_SIMD"
8746 {
8747   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8748   emit_insn (gen_aarch64_st<nregs><vstruct_elt>_dreg (mem, operands[1]));
8749   DONE;
8750 })
8751
8752 (define_expand "@aarch64_st<nregs><vstruct_elt>"
8753  [(match_operand:DI 0 "register_operand")
8754   (match_operand:VSTRUCT_Q 1 "register_operand")]
8755   "TARGET_SIMD"
8756 {
8757   rtx mem = gen_rtx_MEM (<MODE>mode, operands[0]);
8758   emit_insn (gen_aarch64_simd_st<nregs><vstruct_elt> (mem, operands[1]));
8759   DONE;
8760 })
8761
8762 (define_expand "@aarch64_st<nregs>_lane<vstruct_elt>"
8763  [(match_operand:DI 0 "register_operand")
8764   (match_operand:VSTRUCT_QD 1 "register_operand")
8765   (match_operand:SI 2 "immediate_operand")]
8766   "TARGET_SIMD"
8767 {
8768   rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
8769   set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) * <nregs>);
8770
8771   aarch64_simd_lane_bounds (operands[2], 0,
8772                 GET_MODE_NUNITS (<MODE>mode).to_constant () / <nregs>, NULL);
8773   emit_insn (gen_aarch64_vec_store_lanes<mode>_lane<vstruct_elt> (mem,
8774                                         operands[1], operands[2]));
8775   DONE;
8776 })
8777
8778 (define_expand "@aarch64_st1<VALL_F16:mode>"
8779  [(match_operand:DI 0 "register_operand")
8780   (match_operand:VALL_F16 1 "register_operand")]
8781   "TARGET_SIMD"
8782 {
8783   machine_mode mode = <VALL_F16:MODE>mode;
8784   rtx mem = gen_rtx_MEM (mode, operands[0]);
8785
8786   if (BYTES_BIG_ENDIAN)
8787     emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
8788   else
8789     emit_move_insn (mem, operands[1]);
8790   DONE;
8791 })
8792
8793 ;; Standard pattern name vec_init<mode><Vel>.
8794
8795 (define_expand "vec_init<mode><Vel>"
8796   [(match_operand:VALL_F16 0 "register_operand")
8797    (match_operand 1 "" "")]
8798   "TARGET_SIMD"
8799 {
8800   aarch64_expand_vector_init (operands[0], operands[1]);
8801   DONE;
8802 })
8803
8804 (define_expand "vec_init<mode><Vhalf>"
8805   [(match_operand:VQ_NO2E 0 "register_operand")
8806    (match_operand 1 "" "")]
8807   "TARGET_SIMD"
8808 {
8809   aarch64_expand_vector_init (operands[0], operands[1]);
8810   DONE;
8811 })
8812
8813 (define_insn "*aarch64_simd_ld1r<mode>"
8814   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
8815         (vec_duplicate:VALL_F16
8816           (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
8817   "TARGET_SIMD"
8818   "ld1r\\t{%0.<Vtype>}, %1"
8819   [(set_attr "type" "neon_load1_all_lanes")]
8820 )
8821
8822 (define_insn "aarch64_simd_ld1<vstruct_elt>_x2"
8823   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
8824         (unspec:VSTRUCT_2QD [
8825             (match_operand:VSTRUCT_2QD 1 "aarch64_simd_struct_operand" "Utv")]
8826             UNSPEC_LD1))]
8827   "TARGET_SIMD"
8828   "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
8829   [(set_attr "type" "neon_load1_2reg<q>")]
8830 )
8831
8832
8833 (define_insn "@aarch64_frecpe<mode>"
8834   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8835         (unspec:VHSDF_HSDF
8836          [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
8837          UNSPEC_FRECPE))]
8838   "TARGET_SIMD"
8839   "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
8840   [(set_attr "type" "neon_fp_recpe_<stype><q>")]
8841 )
8842
8843 (define_insn "aarch64_frecpx<mode>"
8844   [(set (match_operand:GPF_F16 0 "register_operand" "=w")
8845         (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
8846          UNSPEC_FRECPX))]
8847   "TARGET_SIMD"
8848   "frecpx\t%<s>0, %<s>1"
8849   [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
8850 )
8851
8852 (define_insn "@aarch64_frecps<mode>"
8853   [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
8854         (unspec:VHSDF_HSDF
8855           [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
8856           (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
8857           UNSPEC_FRECPS))]
8858   "TARGET_SIMD"
8859   "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
8860   [(set_attr "type" "neon_fp_recps_<stype><q>")]
8861 )
8862
8863 (define_insn "aarch64_urecpe<mode>"
8864   [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
8865         (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
8866                 UNSPEC_URECPE))]
8867  "TARGET_SIMD"
8868  "urecpe\\t%0.<Vtype>, %1.<Vtype>"
8869   [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
8870
8871 ;; Standard pattern name vec_extract<mode><Vel>.
8872
8873 (define_expand "vec_extract<mode><Vel>"
8874   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
8875    (match_operand:VALL_F16 1 "register_operand")
8876    (match_operand:SI 2 "immediate_operand")]
8877   "TARGET_SIMD"
8878 {
8879     emit_insn
8880       (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
8881     DONE;
8882 })
8883
8884 ;; Extract a 64-bit vector from one half of a 128-bit vector.
8885 (define_expand "vec_extract<mode><Vhalf>"
8886   [(match_operand:<VHALF> 0 "register_operand")
8887    (match_operand:VQMOV_NO2E 1 "register_operand")
8888    (match_operand 2 "immediate_operand")]
8889   "TARGET_SIMD"
8890 {
8891   int start = INTVAL (operands[2]);
8892   gcc_assert (start == 0 || start == 1);
8893   start *= <nunits> / 2;
8894   rtx sel = aarch64_gen_stepped_int_parallel (<nunits> / 2, start, 1);
8895   emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], sel));
8896   DONE;
8897 })
8898
8899 ;; Extract a single-element 64-bit vector from one half of a 128-bit vector.
8900 (define_expand "vec_extract<mode><V1half>"
8901   [(match_operand:<V1HALF> 0 "register_operand")
8902    (match_operand:VQ_2E 1 "register_operand")
8903    (match_operand 2 "immediate_operand")]
8904   "TARGET_SIMD"
8905 {
8906   /* V1DI and V1DF are rarely used by other patterns, so it should be better
8907      to hide it in a subreg destination of a normal DI or DF op.  */
8908   rtx scalar0 = gen_lowpart (<VHALF>mode, operands[0]);
8909   emit_insn (gen_vec_extract<mode><Vhalf> (scalar0, operands[1], operands[2]));
8910   DONE;
8911 })
8912
8913 ;; aes
8914
8915 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
8916   [(set (match_operand:V16QI 0 "register_operand" "=w")
8917         (unspec:V16QI
8918                 [(xor:V16QI
8919                  (match_operand:V16QI 1 "register_operand" "%0")
8920                  (match_operand:V16QI 2 "register_operand" "w"))]
8921          CRYPTO_AES))]
8922   "TARGET_AES"
8923   "aes<aes_op>\\t%0.16b, %2.16b"
8924   [(set_attr "type" "crypto_aese")]
8925 )
8926
8927 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
8928   [(set (match_operand:V16QI 0 "register_operand" "=w")
8929         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
8930          CRYPTO_AESMC))]
8931   "TARGET_AES"
8932   "aes<aesmc_op>\\t%0.16b, %1.16b"
8933   [(set_attr "type" "crypto_aesmc")]
8934 )
8935
8936 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
8937 ;; and enforce the register dependency without scheduling or register
8938 ;; allocation messing up the order or introducing moves inbetween.
8939 ;;  Mash the two together during combine.
8940
8941 (define_insn "*aarch64_crypto_aese_fused"
8942   [(set (match_operand:V16QI 0 "register_operand" "=w")
8943         (unspec:V16QI
8944           [(unspec:V16QI
8945            [(xor:V16QI
8946                 (match_operand:V16QI 1 "register_operand" "%0")
8947                 (match_operand:V16QI 2 "register_operand" "w"))]
8948              UNSPEC_AESE)]
8949         UNSPEC_AESMC))]
8950   "TARGET_AES
8951    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8952   "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
8953   [(set_attr "type" "crypto_aese")
8954    (set_attr "length" "8")]
8955 )
8956
8957 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
8958 ;; and enforce the register dependency without scheduling or register
8959 ;; allocation messing up the order or introducing moves inbetween.
8960 ;;  Mash the two together during combine.
8961
8962 (define_insn "*aarch64_crypto_aesd_fused"
8963   [(set (match_operand:V16QI 0 "register_operand" "=w")
8964         (unspec:V16QI
8965           [(unspec:V16QI
8966                     [(xor:V16QI
8967                         (match_operand:V16QI 1 "register_operand" "%0")
8968                         (match_operand:V16QI 2 "register_operand" "w"))]
8969                 UNSPEC_AESD)]
8970           UNSPEC_AESIMC))]
8971   "TARGET_AES
8972    && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
8973   "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
8974   [(set_attr "type" "crypto_aese")
8975    (set_attr "length" "8")]
8976 )
8977
8978 ;; sha1
8979
8980 (define_insn "aarch64_crypto_sha1hsi"
8981   [(set (match_operand:SI 0 "register_operand" "=w")
8982         (unspec:SI [(match_operand:SI 1
8983                        "register_operand" "w")]
8984          UNSPEC_SHA1H))]
8985   "TARGET_SHA2"
8986   "sha1h\\t%s0, %s1"
8987   [(set_attr "type" "crypto_sha1_fast")]
8988 )
8989
8990 (define_insn "aarch64_crypto_sha1hv4si"
8991   [(set (match_operand:SI 0 "register_operand" "=w")
8992         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
8993                      (parallel [(const_int 0)]))]
8994          UNSPEC_SHA1H))]
8995   "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
8996   "sha1h\\t%s0, %s1"
8997   [(set_attr "type" "crypto_sha1_fast")]
8998 )
8999
9000 (define_insn "aarch64_be_crypto_sha1hv4si"
9001   [(set (match_operand:SI 0 "register_operand" "=w")
9002         (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
9003                      (parallel [(const_int 3)]))]
9004          UNSPEC_SHA1H))]
9005   "TARGET_SHA2 && BYTES_BIG_ENDIAN"
9006   "sha1h\\t%s0, %s1"
9007   [(set_attr "type" "crypto_sha1_fast")]
9008 )
9009
9010 (define_insn "aarch64_crypto_sha1su1v4si"
9011   [(set (match_operand:V4SI 0 "register_operand" "=w")
9012         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9013                       (match_operand:V4SI 2 "register_operand" "w")]
9014          UNSPEC_SHA1SU1))]
9015   "TARGET_SHA2"
9016   "sha1su1\\t%0.4s, %2.4s"
9017   [(set_attr "type" "crypto_sha1_fast")]
9018 )
9019
9020 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
9021   [(set (match_operand:V4SI 0 "register_operand" "=w")
9022         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9023                       (match_operand:SI 2 "register_operand" "w")
9024                       (match_operand:V4SI 3 "register_operand" "w")]
9025          CRYPTO_SHA1))]
9026   "TARGET_SHA2"
9027   "sha1<sha1_op>\\t%q0, %s2, %3.4s"
9028   [(set_attr "type" "crypto_sha1_slow")]
9029 )
9030
9031 (define_insn "aarch64_crypto_sha1su0v4si"
9032   [(set (match_operand:V4SI 0 "register_operand" "=w")
9033         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9034                       (match_operand:V4SI 2 "register_operand" "w")
9035                       (match_operand:V4SI 3 "register_operand" "w")]
9036          UNSPEC_SHA1SU0))]
9037   "TARGET_SHA2"
9038   "sha1su0\\t%0.4s, %2.4s, %3.4s"
9039   [(set_attr "type" "crypto_sha1_xor")]
9040 )
9041
9042 ;; sha256
9043
9044 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
9045   [(set (match_operand:V4SI 0 "register_operand" "=w")
9046         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9047                       (match_operand:V4SI 2 "register_operand" "w")
9048                       (match_operand:V4SI 3 "register_operand" "w")]
9049          CRYPTO_SHA256))]
9050   "TARGET_SHA2"
9051   "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
9052   [(set_attr "type" "crypto_sha256_slow")]
9053 )
9054
9055 (define_insn "aarch64_crypto_sha256su0v4si"
9056   [(set (match_operand:V4SI 0 "register_operand" "=w")
9057         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9058                       (match_operand:V4SI 2 "register_operand" "w")]
9059          UNSPEC_SHA256SU0))]
9060   "TARGET_SHA2"
9061   "sha256su0\\t%0.4s, %2.4s"
9062   [(set_attr "type" "crypto_sha256_fast")]
9063 )
9064
9065 (define_insn "aarch64_crypto_sha256su1v4si"
9066   [(set (match_operand:V4SI 0 "register_operand" "=w")
9067         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9068                       (match_operand:V4SI 2 "register_operand" "w")
9069                       (match_operand:V4SI 3 "register_operand" "w")]
9070          UNSPEC_SHA256SU1))]
9071   "TARGET_SHA2"
9072   "sha256su1\\t%0.4s, %2.4s, %3.4s"
9073   [(set_attr "type" "crypto_sha256_slow")]
9074 )
9075
9076 ;; sha512
9077
9078 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
9079   [(set (match_operand:V2DI 0 "register_operand" "=w")
9080         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9081                       (match_operand:V2DI 2 "register_operand" "w")
9082                       (match_operand:V2DI 3 "register_operand" "w")]
9083          CRYPTO_SHA512))]
9084   "TARGET_SHA3"
9085   "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
9086   [(set_attr "type" "crypto_sha512")]
9087 )
9088
9089 (define_insn "aarch64_crypto_sha512su0qv2di"
9090   [(set (match_operand:V2DI 0 "register_operand" "=w")
9091         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9092                       (match_operand:V2DI 2 "register_operand" "w")]
9093          UNSPEC_SHA512SU0))]
9094   "TARGET_SHA3"
9095   "sha512su0\\t%0.2d, %2.2d"
9096   [(set_attr "type" "crypto_sha512")]
9097 )
9098
9099 (define_insn "aarch64_crypto_sha512su1qv2di"
9100   [(set (match_operand:V2DI 0 "register_operand" "=w")
9101         (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
9102                       (match_operand:V2DI 2 "register_operand" "w")
9103                       (match_operand:V2DI 3 "register_operand" "w")]
9104          UNSPEC_SHA512SU1))]
9105   "TARGET_SHA3"
9106   "sha512su1\\t%0.2d, %2.2d, %3.2d"
9107   [(set_attr "type" "crypto_sha512")]
9108 )
9109
9110 ;; sha3
9111
9112 (define_insn "eor3q<mode>4"
9113   [(set (match_operand:VQ_I 0 "register_operand" "=w")
9114         (xor:VQ_I
9115          (xor:VQ_I
9116           (match_operand:VQ_I 2 "register_operand" "w")
9117           (match_operand:VQ_I 3 "register_operand" "w"))
9118          (match_operand:VQ_I 1 "register_operand" "w")))]
9119   "TARGET_SHA3"
9120   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
9121   [(set_attr "type" "crypto_sha3")]
9122 )
9123
9124 (define_insn "aarch64_rax1qv2di"
9125   [(set (match_operand:V2DI 0 "register_operand" "=w")
9126         (xor:V2DI
9127          (rotate:V2DI
9128           (match_operand:V2DI 2 "register_operand" "w")
9129           (const_int 1))
9130          (match_operand:V2DI 1 "register_operand" "w")))]
9131   "TARGET_SHA3"
9132   "rax1\\t%0.2d, %1.2d, %2.2d"
9133   [(set_attr "type" "crypto_sha3")]
9134 )
9135
9136 (define_insn "*aarch64_xarqv2di_insn"
9137   [(set (match_operand:V2DI 0 "register_operand" "=w")
9138         (rotate:V2DI
9139          (xor:V2DI
9140           (match_operand:V2DI 1 "register_operand" "%w")
9141           (match_operand:V2DI 2 "register_operand" "w"))
9142          (match_operand:V2DI 3 "aarch64_simd_lshift_imm" "Dl")))]
9143   "TARGET_SHA3"
9144   {
9145     operands[3]
9146       = GEN_INT (64 - INTVAL (unwrap_const_vec_duplicate (operands[3])));
9147     return "xar\\t%0.2d, %1.2d, %2.2d, %3";
9148   }
9149   [(set_attr "type" "crypto_sha3")]
9150 )
9151
9152 ;; The semantics of the vxarq_u64 intrinsics treat the immediate argument as a
9153 ;; right-rotate amount but the recommended representation of rotates by a
9154 ;; constant in RTL is with the left ROTATE code.  Translate between the
9155 ;; intrinsic-provided amount and the RTL operands in the expander here.
9156 ;; The define_insn for XAR will translate back to instruction semantics in its
9157 ;; output logic.
9158 (define_expand "aarch64_xarqv2di"
9159   [(set (match_operand:V2DI 0 "register_operand")
9160         (rotate:V2DI
9161          (xor:V2DI
9162           (match_operand:V2DI 1 "register_operand")
9163           (match_operand:V2DI 2 "register_operand"))
9164          (match_operand:SI 3 "aarch64_simd_shift_imm_di")))]
9165   "TARGET_SHA3"
9166   {
9167     operands[3]
9168       = aarch64_simd_gen_const_vector_dup (V2DImode,
9169                                            64 - INTVAL (operands[3]));
9170   }
9171 )
9172
9173 (define_insn "bcaxq<mode>4"
9174   [(set (match_operand:VQ_I 0 "register_operand" "=w")
9175         (xor:VQ_I
9176          (and:VQ_I
9177           (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
9178           (match_operand:VQ_I 2 "register_operand" "w"))
9179          (match_operand:VQ_I 1 "register_operand" "w")))]
9180   "TARGET_SHA3"
9181   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
9182   [(set_attr "type" "crypto_sha3")]
9183 )
9184
9185 ;; SM3
9186
9187 (define_insn "aarch64_sm3ss1qv4si"
9188   [(set (match_operand:V4SI 0 "register_operand" "=w")
9189         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9190                       (match_operand:V4SI 2 "register_operand" "w")
9191                       (match_operand:V4SI 3 "register_operand" "w")]
9192          UNSPEC_SM3SS1))]
9193   "TARGET_SM4"
9194   "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
9195   [(set_attr "type" "crypto_sm3")]
9196 )
9197
9198
9199 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
9200   [(set (match_operand:V4SI 0 "register_operand" "=w")
9201         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9202                       (match_operand:V4SI 2 "register_operand" "w")
9203                       (match_operand:V4SI 3 "register_operand" "w")
9204                       (match_operand:SI 4 "aarch64_imm2" "Ui2")]
9205          CRYPTO_SM3TT))]
9206   "TARGET_SM4"
9207   "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
9208   [(set_attr "type" "crypto_sm3")]
9209 )
9210
9211 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
9212   [(set (match_operand:V4SI 0 "register_operand" "=w")
9213         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9214                       (match_operand:V4SI 2 "register_operand" "w")
9215                       (match_operand:V4SI 3 "register_operand" "w")]
9216          CRYPTO_SM3PART))]
9217   "TARGET_SM4"
9218   "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
9219   [(set_attr "type" "crypto_sm3")]
9220 )
9221
9222 ;; SM4
9223
9224 (define_insn "aarch64_sm4eqv4si"
9225   [(set (match_operand:V4SI 0 "register_operand" "=w")
9226         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
9227                       (match_operand:V4SI 2 "register_operand" "w")]
9228          UNSPEC_SM4E))]
9229   "TARGET_SM4"
9230   "sm4e\\t%0.4s, %2.4s"
9231   [(set_attr "type" "crypto_sm4")]
9232 )
9233
9234 (define_insn "aarch64_sm4ekeyqv4si"
9235   [(set (match_operand:V4SI 0 "register_operand" "=w")
9236         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
9237                       (match_operand:V4SI 2 "register_operand" "w")]
9238          UNSPEC_SM4EKEY))]
9239   "TARGET_SM4"
9240   "sm4ekey\\t%0.4s, %1.4s, %2.4s"
9241   [(set_attr "type" "crypto_sm4")]
9242 )
9243
9244 ;; fp16fml
9245
9246 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
9247   [(set (match_operand:VDQSF 0 "register_operand")
9248         (unspec:VDQSF
9249          [(match_operand:VDQSF 1 "register_operand")
9250           (match_operand:<VFMLA_W> 2 "register_operand")
9251           (match_operand:<VFMLA_W> 3 "register_operand")]
9252          VFMLA16_LOW))]
9253   "TARGET_F16FML"
9254 {
9255   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9256                                             <nunits> * 2, false);
9257   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
9258                                             <nunits> * 2, false);
9259
9260   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
9261                                                                 operands[1],
9262                                                                 operands[2],
9263                                                                 operands[3],
9264                                                                 p1, p2));
9265   DONE;
9266
9267 })
9268
9269 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
9270   [(set (match_operand:VDQSF 0 "register_operand")
9271         (unspec:VDQSF
9272          [(match_operand:VDQSF 1 "register_operand")
9273           (match_operand:<VFMLA_W> 2 "register_operand")
9274           (match_operand:<VFMLA_W> 3 "register_operand")]
9275          VFMLA16_HIGH))]
9276   "TARGET_F16FML"
9277 {
9278   rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9279   rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
9280
9281   emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
9282                                                                  operands[1],
9283                                                                  operands[2],
9284                                                                  operands[3],
9285                                                                  p1, p2));
9286   DONE;
9287 })
9288
9289 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
9290   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9291         (fma:VDQSF
9292          (float_extend:VDQSF
9293           (vec_select:<VFMLA_SEL_W>
9294            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9295            (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
9296          (float_extend:VDQSF
9297           (vec_select:<VFMLA_SEL_W>
9298            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9299            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9300          (match_operand:VDQSF 1 "register_operand" "0")))]
9301   "TARGET_F16FML"
9302   "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9303   [(set_attr "type" "neon_fp_mul_s")]
9304 )
9305
9306 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
9307   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9308         (fma:VDQSF
9309          (float_extend:VDQSF
9310           (neg:<VFMLA_SEL_W>
9311            (vec_select:<VFMLA_SEL_W>
9312             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9313             (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
9314          (float_extend:VDQSF
9315           (vec_select:<VFMLA_SEL_W>
9316            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9317            (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
9318          (match_operand:VDQSF 1 "register_operand" "0")))]
9319   "TARGET_F16FML"
9320   "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9321   [(set_attr "type" "neon_fp_mul_s")]
9322 )
9323
9324 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
9325   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9326         (fma:VDQSF
9327          (float_extend:VDQSF
9328           (vec_select:<VFMLA_SEL_W>
9329            (match_operand:<VFMLA_W> 2 "register_operand" "w")
9330            (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
9331          (float_extend:VDQSF
9332           (vec_select:<VFMLA_SEL_W>
9333            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9334            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9335          (match_operand:VDQSF 1 "register_operand" "0")))]
9336   "TARGET_F16FML"
9337   "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9338   [(set_attr "type" "neon_fp_mul_s")]
9339 )
9340
9341 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
9342   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9343         (fma:VDQSF
9344          (float_extend:VDQSF
9345           (neg:<VFMLA_SEL_W>
9346            (vec_select:<VFMLA_SEL_W>
9347             (match_operand:<VFMLA_W> 2 "register_operand" "w")
9348             (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
9349          (float_extend:VDQSF
9350           (vec_select:<VFMLA_SEL_W>
9351            (match_operand:<VFMLA_W> 3 "register_operand" "w")
9352            (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
9353          (match_operand:VDQSF 1 "register_operand" "0")))]
9354   "TARGET_F16FML"
9355   "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
9356   [(set_attr "type" "neon_fp_mul_s")]
9357 )
9358
9359 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
9360   [(set (match_operand:V2SF 0 "register_operand")
9361         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9362                            (match_operand:V4HF 2 "register_operand")
9363                            (match_operand:V4HF 3 "register_operand")
9364                            (match_operand:SI 4 "aarch64_imm2")]
9365          VFMLA16_LOW))]
9366   "TARGET_F16FML"
9367 {
9368     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9369     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9370
9371     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
9372                                                             operands[1],
9373                                                             operands[2],
9374                                                             operands[3],
9375                                                             p1, lane));
9376     DONE;
9377 }
9378 )
9379
9380 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
9381   [(set (match_operand:V2SF 0 "register_operand")
9382         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9383                            (match_operand:V4HF 2 "register_operand")
9384                            (match_operand:V4HF 3 "register_operand")
9385                            (match_operand:SI 4 "aarch64_imm2")]
9386          VFMLA16_HIGH))]
9387   "TARGET_F16FML"
9388 {
9389     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9390     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9391
9392     emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
9393                                                              operands[1],
9394                                                              operands[2],
9395                                                              operands[3],
9396                                                              p1, lane));
9397     DONE;
9398 })
9399
9400 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
9401   [(set (match_operand:V2SF 0 "register_operand" "=w")
9402         (fma:V2SF
9403          (float_extend:V2SF
9404            (vec_select:V2HF
9405             (match_operand:V4HF 2 "register_operand" "w")
9406             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9407          (float_extend:V2SF
9408            (vec_duplicate:V2HF
9409             (vec_select:HF
9410              (match_operand:V4HF 3 "register_operand" "x")
9411              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9412          (match_operand:V2SF 1 "register_operand" "0")))]
9413   "TARGET_F16FML"
9414   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9415   [(set_attr "type" "neon_fp_mul_s")]
9416 )
9417
9418 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
9419   [(set (match_operand:V2SF 0 "register_operand" "=w")
9420         (fma:V2SF
9421          (float_extend:V2SF
9422           (neg:V2HF
9423            (vec_select:V2HF
9424             (match_operand:V4HF 2 "register_operand" "w")
9425             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9426          (float_extend:V2SF
9427           (vec_duplicate:V2HF
9428            (vec_select:HF
9429             (match_operand:V4HF 3 "register_operand" "x")
9430             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9431          (match_operand:V2SF 1 "register_operand" "0")))]
9432   "TARGET_F16FML"
9433   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9434   [(set_attr "type" "neon_fp_mul_s")]
9435 )
9436
9437 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
9438   [(set (match_operand:V2SF 0 "register_operand" "=w")
9439         (fma:V2SF
9440          (float_extend:V2SF
9441            (vec_select:V2HF
9442             (match_operand:V4HF 2 "register_operand" "w")
9443             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9444          (float_extend:V2SF
9445            (vec_duplicate:V2HF
9446             (vec_select:HF
9447              (match_operand:V4HF 3 "register_operand" "x")
9448              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9449          (match_operand:V2SF 1 "register_operand" "0")))]
9450   "TARGET_F16FML"
9451   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9452   [(set_attr "type" "neon_fp_mul_s")]
9453 )
9454
9455 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
9456   [(set (match_operand:V2SF 0 "register_operand" "=w")
9457         (fma:V2SF
9458          (float_extend:V2SF
9459            (neg:V2HF
9460             (vec_select:V2HF
9461              (match_operand:V4HF 2 "register_operand" "w")
9462              (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9463          (float_extend:V2SF
9464            (vec_duplicate:V2HF
9465             (vec_select:HF
9466              (match_operand:V4HF 3 "register_operand" "x")
9467              (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9468          (match_operand:V2SF 1 "register_operand" "0")))]
9469   "TARGET_F16FML"
9470   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9471   [(set_attr "type" "neon_fp_mul_s")]
9472 )
9473
9474 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
9475   [(set (match_operand:V4SF 0 "register_operand")
9476         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9477                            (match_operand:V8HF 2 "register_operand")
9478                            (match_operand:V8HF 3 "register_operand")
9479                            (match_operand:SI 4 "aarch64_lane_imm3")]
9480          VFMLA16_LOW))]
9481   "TARGET_F16FML"
9482 {
9483     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9484     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9485
9486     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
9487                                                               operands[1],
9488                                                               operands[2],
9489                                                               operands[3],
9490                                                               p1, lane));
9491     DONE;
9492 })
9493
9494 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
9495   [(set (match_operand:V4SF 0 "register_operand")
9496         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9497                            (match_operand:V8HF 2 "register_operand")
9498                            (match_operand:V8HF 3 "register_operand")
9499                            (match_operand:SI 4 "aarch64_lane_imm3")]
9500          VFMLA16_HIGH))]
9501   "TARGET_F16FML"
9502 {
9503     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9504     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9505
9506     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
9507                                                                operands[1],
9508                                                                operands[2],
9509                                                                operands[3],
9510                                                                p1, lane));
9511     DONE;
9512 })
9513
9514 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
9515   [(set (match_operand:V4SF 0 "register_operand" "=w")
9516         (fma:V4SF
9517          (float_extend:V4SF
9518           (vec_select:V4HF
9519             (match_operand:V8HF 2 "register_operand" "w")
9520             (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9521          (float_extend:V4SF
9522           (vec_duplicate:V4HF
9523            (vec_select:HF
9524             (match_operand:V8HF 3 "register_operand" "x")
9525             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9526          (match_operand:V4SF 1 "register_operand" "0")))]
9527   "TARGET_F16FML"
9528   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9529   [(set_attr "type" "neon_fp_mul_s")]
9530 )
9531
9532 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
9533   [(set (match_operand:V4SF 0 "register_operand" "=w")
9534         (fma:V4SF
9535           (float_extend:V4SF
9536            (neg:V4HF
9537             (vec_select:V4HF
9538              (match_operand:V8HF 2 "register_operand" "w")
9539              (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9540          (float_extend:V4SF
9541           (vec_duplicate:V4HF
9542            (vec_select:HF
9543             (match_operand:V8HF 3 "register_operand" "x")
9544             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9545          (match_operand:V4SF 1 "register_operand" "0")))]
9546   "TARGET_F16FML"
9547   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9548   [(set_attr "type" "neon_fp_mul_s")]
9549 )
9550
9551 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
9552   [(set (match_operand:V4SF 0 "register_operand" "=w")
9553         (fma:V4SF
9554          (float_extend:V4SF
9555           (vec_select:V4HF
9556             (match_operand:V8HF 2 "register_operand" "w")
9557             (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9558          (float_extend:V4SF
9559           (vec_duplicate:V4HF
9560            (vec_select:HF
9561             (match_operand:V8HF 3 "register_operand" "x")
9562             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9563          (match_operand:V4SF 1 "register_operand" "0")))]
9564   "TARGET_F16FML"
9565   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9566   [(set_attr "type" "neon_fp_mul_s")]
9567 )
9568
9569 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
9570   [(set (match_operand:V4SF 0 "register_operand" "=w")
9571         (fma:V4SF
9572          (float_extend:V4SF
9573           (neg:V4HF
9574            (vec_select:V4HF
9575             (match_operand:V8HF 2 "register_operand" "w")
9576             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9577          (float_extend:V4SF
9578           (vec_duplicate:V4HF
9579            (vec_select:HF
9580             (match_operand:V8HF 3 "register_operand" "x")
9581             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9582          (match_operand:V4SF 1 "register_operand" "0")))]
9583   "TARGET_F16FML"
9584   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9585   [(set_attr "type" "neon_fp_mul_s")]
9586 )
9587
9588 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
9589   [(set (match_operand:V2SF 0 "register_operand")
9590         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9591                       (match_operand:V4HF 2 "register_operand")
9592                       (match_operand:V8HF 3 "register_operand")
9593                       (match_operand:SI 4 "aarch64_lane_imm3")]
9594          VFMLA16_LOW))]
9595   "TARGET_F16FML"
9596 {
9597     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
9598     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9599
9600     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
9601                                                              operands[1],
9602                                                              operands[2],
9603                                                              operands[3],
9604                                                              p1, lane));
9605     DONE;
9606
9607 })
9608
9609 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
9610   [(set (match_operand:V2SF 0 "register_operand")
9611         (unspec:V2SF [(match_operand:V2SF 1 "register_operand")
9612                       (match_operand:V4HF 2 "register_operand")
9613                       (match_operand:V8HF 3 "register_operand")
9614                       (match_operand:SI 4 "aarch64_lane_imm3")]
9615          VFMLA16_HIGH))]
9616   "TARGET_F16FML"
9617 {
9618     rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
9619     rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
9620
9621     emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
9622                                                               operands[1],
9623                                                               operands[2],
9624                                                               operands[3],
9625                                                               p1, lane));
9626     DONE;
9627
9628 })
9629
9630 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
9631   [(set (match_operand:V2SF 0 "register_operand" "=w")
9632         (fma:V2SF
9633          (float_extend:V2SF
9634            (vec_select:V2HF
9635             (match_operand:V4HF 2 "register_operand" "w")
9636             (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
9637          (float_extend:V2SF
9638           (vec_duplicate:V2HF
9639            (vec_select:HF
9640             (match_operand:V8HF 3 "register_operand" "x")
9641             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9642          (match_operand:V2SF 1 "register_operand" "0")))]
9643   "TARGET_F16FML"
9644   "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
9645   [(set_attr "type" "neon_fp_mul_s")]
9646 )
9647
9648 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
9649   [(set (match_operand:V2SF 0 "register_operand" "=w")
9650         (fma:V2SF
9651          (float_extend:V2SF
9652           (neg:V2HF
9653            (vec_select:V2HF
9654             (match_operand:V4HF 2 "register_operand" "w")
9655             (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
9656          (float_extend:V2SF
9657           (vec_duplicate:V2HF
9658            (vec_select:HF
9659             (match_operand:V8HF 3 "register_operand" "x")
9660             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9661          (match_operand:V2SF 1 "register_operand" "0")))]
9662   "TARGET_F16FML"
9663   "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
9664   [(set_attr "type" "neon_fp_mul_s")]
9665 )
9666
9667 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
9668   [(set (match_operand:V2SF 0 "register_operand" "=w")
9669         (fma:V2SF
9670          (float_extend:V2SF
9671            (vec_select:V2HF
9672             (match_operand:V4HF 2 "register_operand" "w")
9673             (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
9674          (float_extend:V2SF
9675           (vec_duplicate:V2HF
9676            (vec_select:HF
9677             (match_operand:V8HF 3 "register_operand" "x")
9678             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9679          (match_operand:V2SF 1 "register_operand" "0")))]
9680   "TARGET_F16FML"
9681   "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
9682   [(set_attr "type" "neon_fp_mul_s")]
9683 )
9684
9685 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
9686   [(set (match_operand:V2SF 0 "register_operand" "=w")
9687         (fma:V2SF
9688          (float_extend:V2SF
9689           (neg:V2HF
9690            (vec_select:V2HF
9691             (match_operand:V4HF 2 "register_operand" "w")
9692             (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
9693          (float_extend:V2SF
9694           (vec_duplicate:V2HF
9695            (vec_select:HF
9696             (match_operand:V8HF 3 "register_operand" "x")
9697             (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
9698          (match_operand:V2SF 1 "register_operand" "0")))]
9699   "TARGET_F16FML"
9700   "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
9701   [(set_attr "type" "neon_fp_mul_s")]
9702 )
9703
9704 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
9705   [(set (match_operand:V4SF 0 "register_operand")
9706         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9707                       (match_operand:V8HF 2 "register_operand")
9708                       (match_operand:V4HF 3 "register_operand")
9709                       (match_operand:SI 4 "aarch64_imm2")]
9710          VFMLA16_LOW))]
9711   "TARGET_F16FML"
9712 {
9713     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
9714     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9715
9716     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
9717                                                              operands[1],
9718                                                              operands[2],
9719                                                              operands[3],
9720                                                              p1, lane));
9721     DONE;
9722 })
9723
9724 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
9725   [(set (match_operand:V4SF 0 "register_operand")
9726         (unspec:V4SF [(match_operand:V4SF 1 "register_operand")
9727                       (match_operand:V8HF 2 "register_operand")
9728                       (match_operand:V4HF 3 "register_operand")
9729                       (match_operand:SI 4 "aarch64_imm2")]
9730          VFMLA16_HIGH))]
9731   "TARGET_F16FML"
9732 {
9733     rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
9734     rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
9735
9736     emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
9737                                                               operands[1],
9738                                                               operands[2],
9739                                                               operands[3],
9740                                                               p1, lane));
9741     DONE;
9742 })
9743
9744 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
9745   [(set (match_operand:V4SF 0 "register_operand" "=w")
9746         (fma:V4SF
9747          (float_extend:V4SF
9748           (vec_select:V4HF
9749            (match_operand:V8HF 2 "register_operand" "w")
9750            (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
9751          (float_extend:V4SF
9752           (vec_duplicate:V4HF
9753            (vec_select:HF
9754             (match_operand:V4HF 3 "register_operand" "x")
9755             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9756          (match_operand:V4SF 1 "register_operand" "0")))]
9757   "TARGET_F16FML"
9758   "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
9759   [(set_attr "type" "neon_fp_mul_s")]
9760 )
9761
9762 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
9763   [(set (match_operand:V4SF 0 "register_operand" "=w")
9764         (fma:V4SF
9765          (float_extend:V4SF
9766           (neg:V4HF
9767            (vec_select:V4HF
9768             (match_operand:V8HF 2 "register_operand" "w")
9769             (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
9770          (float_extend:V4SF
9771           (vec_duplicate:V4HF
9772            (vec_select:HF
9773             (match_operand:V4HF 3 "register_operand" "x")
9774             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9775          (match_operand:V4SF 1 "register_operand" "0")))]
9776   "TARGET_F16FML"
9777   "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
9778   [(set_attr "type" "neon_fp_mul_s")]
9779 )
9780
9781 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
9782   [(set (match_operand:V4SF 0 "register_operand" "=w")
9783         (fma:V4SF
9784          (float_extend:V4SF
9785           (vec_select:V4HF
9786            (match_operand:V8HF 2 "register_operand" "w")
9787            (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
9788          (float_extend:V4SF
9789           (vec_duplicate:V4HF
9790            (vec_select:HF
9791             (match_operand:V4HF 3 "register_operand" "x")
9792             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9793          (match_operand:V4SF 1 "register_operand" "0")))]
9794   "TARGET_F16FML"
9795   "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
9796   [(set_attr "type" "neon_fp_mul_s")]
9797 )
9798
9799 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
9800   [(set (match_operand:V4SF 0 "register_operand" "=w")
9801         (fma:V4SF
9802          (float_extend:V4SF
9803           (neg:V4HF
9804            (vec_select:V4HF
9805             (match_operand:V8HF 2 "register_operand" "w")
9806             (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
9807          (float_extend:V4SF
9808           (vec_duplicate:V4HF
9809            (vec_select:HF
9810             (match_operand:V4HF 3 "register_operand" "x")
9811             (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
9812          (match_operand:V4SF 1 "register_operand" "0")))]
9813   "TARGET_F16FML"
9814   "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
9815   [(set_attr "type" "neon_fp_mul_s")]
9816 )
9817
9818 ;; pmull
9819
9820 (define_insn "aarch64_crypto_pmulldi"
9821   [(set (match_operand:TI 0 "register_operand" "=w")
9822         (unspec:TI  [(match_operand:DI 1 "register_operand" "w")
9823                      (match_operand:DI 2 "register_operand" "w")]
9824                     UNSPEC_PMULL))]
9825  "TARGET_AES"
9826  "pmull\\t%0.1q, %1.1d, %2.1d"
9827   [(set_attr "type" "crypto_pmull")]
9828 )
9829
9830 (define_insn "aarch64_crypto_pmullv2di"
9831  [(set (match_operand:TI 0 "register_operand" "=w")
9832        (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
9833                    (match_operand:V2DI 2 "register_operand" "w")]
9834                   UNSPEC_PMULL2))]
9835   "TARGET_AES"
9836   "pmull2\\t%0.1q, %1.2d, %2.2d"
9837   [(set_attr "type" "crypto_pmull")]
9838 )
9839
9840 ;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
9841 (define_insn_and_split "<optab><Vnarrowq><mode>2"
9842   [(set (match_operand:VQN 0 "register_operand" "=w")
9843         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9844   "TARGET_SIMD"
9845   "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
9846   "&& <CODE> == ZERO_EXTEND
9847    && aarch64_split_simd_shift_p (insn)"
9848   [(const_int 0)]
9849   {
9850     /* On many cores, it is cheaper to implement UXTL using a ZIP1 with zero,
9851        provided that the cost of the zero can be amortized over several
9852        operations.  We'll later recombine the zero and zip if there are
9853        not sufficient uses of the zero to make the split worthwhile.  */
9854     rtx res = simplify_gen_subreg (<VNARROWQ2>mode, operands[0],
9855                                    <MODE>mode, 0);
9856     rtx zero = aarch64_gen_shareable_zero (<VNARROWQ>mode);
9857     emit_insn (gen_aarch64_zip1<Vnarrowq2>_low (res, operands[1], zero));
9858     DONE;
9859   }
9860   [(set_attr "type" "neon_shift_imm_long")]
9861 )
9862
9863 (define_expand "aarch64_<su>xtl<mode>"
9864   [(set (match_operand:VQN 0 "register_operand" "=w")
9865         (ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
9866   "TARGET_SIMD"
9867   ""
9868 )
9869
9870 ;; Truncate a 128-bit integer vector to a 64-bit vector.
9871 (define_insn "trunc<mode><Vnarrowq>2<vczle><vczbe>"
9872   [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
9873         (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
9874   "TARGET_SIMD"
9875   "xtn\t%0.<Vntype>, %1.<Vtype>"
9876   [(set_attr "type" "neon_move_narrow_q")]
9877 )
9878
9879 ;; Expander for the intrinsics that only takes one mode unlike the two-mode
9880 ;; trunc optab.
9881 (define_expand "aarch64_xtn<mode>"
9882   [(set (match_operand:<VNARROWQ> 0 "register_operand")
9883        (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
9884   "TARGET_SIMD"
9885   {}
9886 )
9887
9888 (define_insn "aarch64_bfdot<mode>"
9889   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9890         (plus:VDQSF
9891           (unspec:VDQSF
9892            [(match_operand:<VBFMLA_W> 2 "register_operand" "w")
9893             (match_operand:<VBFMLA_W> 3 "register_operand" "w")]
9894             UNSPEC_BFDOT)
9895           (match_operand:VDQSF 1 "register_operand" "0")))]
9896   "TARGET_BF16_SIMD"
9897   "bfdot\t%0.<Vtype>, %2.<Vbfdottype>, %3.<Vbfdottype>"
9898   [(set_attr "type" "neon_dot<q>")]
9899 )
9900
9901 (define_insn "aarch64_bfdot_lane<VBF:isquadop><VDQSF:mode>"
9902   [(set (match_operand:VDQSF 0 "register_operand" "=w")
9903         (plus:VDQSF
9904           (unspec:VDQSF
9905            [(match_operand:<VDQSF:VBFMLA_W> 2 "register_operand" "w")
9906             (match_operand:VBF 3 "register_operand" "w")
9907             (match_operand:SI 4 "const_int_operand" "n")]
9908             UNSPEC_BFDOT)
9909           (match_operand:VDQSF 1 "register_operand" "0")))]
9910   "TARGET_BF16_SIMD"
9911 {
9912   int nunits = GET_MODE_NUNITS (<VBF:MODE>mode).to_constant ();
9913   int lane = INTVAL (operands[4]);
9914   operands[4] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane), SImode);
9915   return "bfdot\t%0.<VDQSF:Vtype>, %2.<VDQSF:Vbfdottype>, %3.2h[%4]";
9916 }
9917   [(set_attr "type" "neon_dot<VDQSF:q>")]
9918 )
9919
9920 ;; bfmmla
9921 (define_insn "aarch64_bfmmlaqv4sf"
9922   [(set (match_operand:V4SF 0 "register_operand" "=w")
9923         (plus:V4SF (match_operand:V4SF 1 "register_operand" "0")
9924                    (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9925                                  (match_operand:V8BF 3 "register_operand" "w")]
9926                     UNSPEC_BFMMLA)))]
9927   "TARGET_BF16_SIMD"
9928   "bfmmla\\t%0.4s, %2.8h, %3.8h"
9929   [(set_attr "type" "neon_fp_mla_s_q")]
9930 )
9931
9932 ;; bfmlal<bt>
9933 (define_insn "aarch64_bfmlal<bt>v4sf"
9934   [(set (match_operand:V4SF 0 "register_operand" "=w")
9935         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9936                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9937                                   (match_operand:V8BF 3 "register_operand" "w")]
9938                      BF_MLA)))]
9939   "TARGET_BF16_SIMD"
9940   "bfmlal<bt>\\t%0.4s, %2.8h, %3.8h"
9941   [(set_attr "type" "neon_fp_mla_s_q")]
9942 )
9943
9944 (define_insn "aarch64_bfmlal<bt>_lane<q>v4sf"
9945   [(set (match_operand:V4SF 0 "register_operand" "=w")
9946         (plus: V4SF (match_operand:V4SF 1 "register_operand" "0")
9947                     (unspec:V4SF [(match_operand:V8BF 2 "register_operand" "w")
9948                                   (match_operand:VBF 3 "register_operand" "x")
9949                                   (match_operand:SI 4 "const_int_operand" "n")]
9950                      BF_MLA)))]
9951   "TARGET_BF16_SIMD"
9952 {
9953   operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
9954   return "bfmlal<bt>\\t%0.4s, %2.8h, %3.h[%4]";
9955 }
9956   [(set_attr "type" "neon_fp_mla_s_scalar_q")]
9957 )
9958
9959 ;; 8-bit integer matrix multiply-accumulate
9960 (define_insn "aarch64_simd_<sur>mmlav16qi"
9961   [(set (match_operand:V4SI 0 "register_operand" "=w")
9962         (plus:V4SI
9963          (unspec:V4SI [(match_operand:V16QI 2 "register_operand" "w")
9964                        (match_operand:V16QI 3 "register_operand" "w")] MATMUL)
9965          (match_operand:V4SI 1 "register_operand" "0")))]
9966   "TARGET_I8MM"
9967   "<sur>mmla\\t%0.4s, %2.16b, %3.16b"
9968   [(set_attr "type" "neon_mla_s_q")]
9969 )
9970
9971 ;; bfcvtn
9972 (define_insn "aarch64_bfcvtn<q><mode>"
9973   [(set (match_operand:V4SF_TO_BF 0 "register_operand" "=w")
9974         (unspec:V4SF_TO_BF [(match_operand:V4SF 1 "register_operand" "w")]
9975                             UNSPEC_BFCVTN))]
9976   "TARGET_BF16_SIMD"
9977   "bfcvtn\\t%0.4h, %1.4s"
9978   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9979 )
9980
9981 (define_insn "aarch64_bfcvtn2v8bf"
9982   [(set (match_operand:V8BF 0 "register_operand" "=w")
9983         (unspec:V8BF [(match_operand:V8BF 1 "register_operand" "0")
9984                       (match_operand:V4SF 2 "register_operand" "w")]
9985                       UNSPEC_BFCVTN2))]
9986   "TARGET_BF16_SIMD"
9987   "bfcvtn2\\t%0.8h, %2.4s"
9988   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
9989 )
9990
9991 (define_insn "aarch64_bfcvtbf"
9992   [(set (match_operand:BF 0 "register_operand" "=w")
9993         (unspec:BF [(match_operand:SF 1 "register_operand" "w")]
9994                     UNSPEC_BFCVT))]
9995   "TARGET_BF16_FP"
9996   "bfcvt\\t%h0, %s1"
9997   [(set_attr "type" "f_cvt")]
9998 )
9999
10000 ;; Use shl/shll/shll2 to convert BF scalar/vector modes to SF modes.
10001 (define_insn "aarch64_vbfcvt<mode>"
10002   [(set (match_operand:V4SF 0 "register_operand" "=w")
10003         (unspec:V4SF [(match_operand:VBF 1 "register_operand" "w")]
10004                       UNSPEC_BFCVTN))]
10005   "TARGET_BF16_SIMD"
10006   "shll\\t%0.4s, %1.4h, #16"
10007   [(set_attr "type" "neon_shift_imm_long")]
10008 )
10009
10010 (define_insn "aarch64_vbfcvt_highv8bf"
10011   [(set (match_operand:V4SF 0 "register_operand" "=w")
10012         (unspec:V4SF [(match_operand:V8BF 1 "register_operand" "w")]
10013                       UNSPEC_BFCVTN2))]
10014   "TARGET_BF16_SIMD"
10015   "shll2\\t%0.4s, %1.8h, #16"
10016   [(set_attr "type" "neon_shift_imm_long")]
10017 )
10018
10019 (define_insn "aarch64_bfcvtsf"
10020   [(set (match_operand:SF 0 "register_operand" "=w")
10021         (unspec:SF [(match_operand:BF 1 "register_operand" "w")]
10022                     UNSPEC_BFCVT))]
10023   "TARGET_BF16_FP"
10024   "shl\\t%d0, %d1, #16"
10025   [(set_attr "type" "neon_shift_imm")]
10026 )
10027
10028 ;; faminmax
10029 (define_insn "@aarch64_<faminmax_uns_op><mode>"
10030   [(set (match_operand:VHSDF 0 "register_operand" "=w")
10031         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
10032                        (match_operand:VHSDF 2 "register_operand" "w")]
10033                       FAMINMAX_UNS))]
10034   "TARGET_FAMINMAX"
10035   "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10036 )
10037
10038 (define_insn "*aarch64_faminmax_fused"
10039   [(set (match_operand:VHSDF 0 "register_operand" "=w")
10040         (FMAXMIN:VHSDF
10041           (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
10042           (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"))))]
10043   "TARGET_FAMINMAX"
10044   "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10045 )
10046
10047 (define_insn "@aarch64_lut<VLUT:mode><VB:mode>"
10048   [(set (match_operand:<VLUT:VCONQ> 0 "register_operand" "=w")
10049         (unspec:<VLUT:VCONQ>
10050          [(match_operand:VLUT 1 "register_operand" "w")
10051           (match_operand:VB 2 "register_operand" "w")
10052           (match_operand:SI 3 "const_int_operand")
10053           (match_operand:SI 4 "const_int_operand")]
10054          UNSPEC_LUTI))]
10055   "TARGET_LUT && INTVAL (operands[4]) <= exact_log2 (<VLUT:nunits>)"
10056   "luti%4\t%0<VLUT:Vconqtype>, {%1<VLUT:Vconqtype>}, %2[%3]"
10057 )
10058
10059 ;; lutx2
10060 (define_insn "@aarch64_lut<VLUTx2:mode><VB:mode>"
10061   [(set (match_operand:<VSTRUCT_ELT> 0 "register_operand" "=w")
10062         (unspec:<VSTRUCT_ELT>
10063          [(match_operand:VLUTx2 1 "register_operand" "w")
10064           (match_operand:VB 2 "register_operand" "w")
10065           (match_operand:SI 3 "const_int_operand")
10066           (match_operand:SI 4 "const_int_operand")]
10067          UNSPEC_LUTI))]
10068   "TARGET_LUT && INTVAL (operands[4]) == 4"
10069   "luti%4\t%0.8h, {%S1.8h, %T1.8h}, %2[%3]"
10070 )
10071
10072 ;; fpm unary instructions (low part).
10073 (define_insn "@aarch64_<insn><mode>"
10074   [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
10075         (unspec:VQ_BHF
10076          [(match_operand:V8QI 1 "register_operand" "w")
10077           (reg:DI FPM_REGNUM)]
10078         FPM_UNARY_UNS))]
10079   "TARGET_FP8"
10080   "<b><insn>\t%0.<Vtype>, %1.8b"
10081 )
10082
10083 ;; fpm unary instructions (high part).
10084 (define_insn "@aarch64_<insn><mode>_high"
10085   [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
10086         (unspec:VQ_BHF
10087          [(vec_select:V8QI
10088             (match_operand:V16QI 1 "register_operand" "w")
10089             (match_operand:V16QI 2 "vect_par_cnst_hi_half"))
10090           (reg:DI FPM_REGNUM)]
10091         FPM_UNARY_UNS))]
10092   "TARGET_FP8"
10093   "<b><insn>2\t%0.<Vtype>, %1.16b"
10094 )
10095
10096 ;; fpm binary instructions.
10097 (define_insn "@aarch64_<insn><mode>"
10098   [(set (match_operand:<VPACKB> 0 "register_operand" "=w")
10099         (unspec:<VPACKB>
10100          [(match_operand:VCVTFPM 1 "register_operand" "w")
10101           (match_operand:VCVTFPM 2 "register_operand" "w")
10102           (reg:DI FPM_REGNUM)]
10103          FPM_BINARY_UNS))]
10104   "TARGET_FP8"
10105   "<insn>\t%0.<VPACKBtype>, %1.<Vtype>, %2.<Vtype>"
10106 )
10107
10108 ;; fpm binary instructions & merge with low.
10109 (define_insn "@aarch64_<insn><mode>_high_le"
10110   [(set (match_operand:V16QI 0 "register_operand" "=w")
10111         (vec_concat:V16QI
10112           (match_operand:V8QI 1 "register_operand" "0")
10113           (unspec:V8QI
10114             [(match_operand:V4SF_ONLY 2 "register_operand" "w")
10115              (match_operand:V4SF_ONLY 3 "register_operand" "w")
10116              (reg:DI FPM_REGNUM)]
10117             FPM_BINARY_UNS)))]
10118   "TARGET_FP8 && !BYTES_BIG_ENDIAN"
10119   "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
10120 )
10121
10122 (define_insn "@aarch64_<insn><mode>_high_be"
10123   [(set (match_operand:V16QI 0 "register_operand" "=w")
10124         (vec_concat:V16QI
10125           (unspec:V8QI
10126             [(match_operand:V4SF_ONLY 2 "register_operand" "w")
10127              (match_operand:V4SF_ONLY 3 "register_operand" "w")
10128              (reg:DI FPM_REGNUM)]
10129             FPM_BINARY_UNS)
10130           (match_operand:V8QI 1 "register_operand" "0")))]
10131   "TARGET_FP8 && BYTES_BIG_ENDIAN"
10132   "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
10133 )
10134
10135 ;; fscale instructions
10136 (define_insn "@aarch64_<insn><mode>"
10137   [(set (match_operand:VHSDF 0 "register_operand" "=w")
10138         (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
10139                        (match_operand:<FCVT_TARGET> 2 "register_operand" "w")]
10140                       FSCALE_UNS))]
10141   "TARGET_FP8"
10142   "<insn>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
10143 )
10144
10145 ;; fpm vdot instructions.  The target requirements are enforced by
10146 ;; VDQ_HSF_FDOT.
10147 (define_insn "@aarch64_<insn><mode>"
10148   [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
10149         (unspec:VDQ_HSF_FDOT
10150          [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
10151           (match_operand:<VNARROWB> 2 "register_operand" "w")
10152           (match_operand:<VNARROWB> 3 "register_operand" "w")
10153           (reg:DI FPM_REGNUM)]
10154          FPM_FDOT))]
10155   ""
10156   "<insn>\t%1.<Vtype>, %2.<Vnbtype>, %3.<Vnbtype>"
10157 )
10158
10159 (define_insn "@aarch64_<insn>_lane<VDQ_HSF_FDOT:mode><VB:mode>"
10160   [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
10161         (unspec:VDQ_HSF_FDOT
10162          [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
10163           (match_operand:<VDQ_HSF_FDOT:VNARROWB> 2 "register_operand" "w")
10164           (match_operand:VB 3 "register_operand" "w")
10165           (match_operand 4 "const_int_operand")
10166           (reg:DI FPM_REGNUM)]
10167          FPM_FDOT_LANE))]
10168   ""
10169   "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
10170 )
10171
10172 ;; fpm fma instructions.
10173 (define_insn "@aarch64_<insn><mode>"
10174   [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
10175         (unspec:V8HF_ONLY
10176          [(match_operand:V8HF_ONLY 1 "register_operand" "0")
10177           (match_operand:V16QI 2 "register_operand" "w")
10178           (match_operand:V16QI 3 "register_operand" "w")
10179           (reg:DI FPM_REGNUM)]
10180         FMLAL_FP8_HF))]
10181   "TARGET_FP8FMA"
10182   "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
10183 )
10184
10185 (define_insn "@aarch64_<insn>_lane<V8HF_ONLY:mode><VB:mode>"
10186   [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
10187         (unspec:V8HF_ONLY
10188          [(match_operand:V8HF_ONLY 1 "register_operand" "0")
10189           (match_operand:V16QI 2 "register_operand" "w")
10190           (vec_duplicate:V16QI
10191             (vec_select:QI
10192               (match_operand:VB 3 "register_operand" "w")
10193               (parallel [(match_operand:SI 4 "immediate_operand")])))
10194           (reg:DI FPM_REGNUM)]
10195         FMLAL_FP8_HF))]
10196   "TARGET_FP8FMA"
10197   {
10198     operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
10199                                            INTVAL (operands[4]));
10200     return "<insn>\t%0.<V8HF_ONLY:Vtype>, %2.16b, %3.b[%4]";
10201   }
10202 )
10203
10204 (define_insn "@aarch64_<insn><mode>"
10205   [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
10206         (unspec:V4SF_ONLY
10207          [(match_operand:V4SF_ONLY 1 "register_operand" "0")
10208           (match_operand:V16QI 2 "register_operand" "w")
10209           (match_operand:V16QI 3 "register_operand" "w")
10210           (reg:DI FPM_REGNUM)]
10211         FMLALL_FP8_SF))]
10212   "TARGET_FP8FMA"
10213   "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
10214 )
10215
10216 (define_insn "@aarch64_<insn>_lane<V4SF_ONLY:mode><VB:mode>"
10217   [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
10218         (unspec:V4SF_ONLY
10219          [(match_operand:V4SF_ONLY 1 "register_operand" "0")
10220           (match_operand:V16QI 2 "register_operand" "w")
10221           (vec_duplicate:V16QI
10222             (vec_select:QI
10223               (match_operand:VB 3 "register_operand" "w")
10224               (parallel [(match_operand:SI 4 "immediate_operand")])))
10225           (reg:DI FPM_REGNUM)]
10226         FMLALL_FP8_SF))]
10227   "TARGET_FP8FMA"
10228   {
10229     operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
10230                                            INTVAL (operands[4]));
10231     return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]";
10232   }
10233 )