gcc/config/aarch64/aarch64-sve2.md

   1 ;; Machine description for AArch64 SVE2.
   2 ;; Copyright (C) 2019-2025 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The file is organised into the following sections (search for the full
  22 ;; line):
  23 ;;
  24 ;; == Moves
  25 ;; ---- Predicate to vector moves
  26 ;; ---- Vector to predicate moves
  27 ;;
  28 ;; == Loads
  29 ;; ---- 128-bit extending loads
  30 ;; ---- 128-bit structure loads
  31 ;; ---- Multi-register loads predicated by a counter
  32 ;; ---- 128-bit gather loads
  33 ;; ---- Non-temporal gather loads
  34 ;;
  35 ;; == Stores
  36 ;; ---- 128-bit truncating stores
  37 ;; ---- 128-bit structure stores
  38 ;; ---- Multi-register stores predicated by a counter
  39 ;; ---- 128-bit scatter stores
  40 ;; ---- Non-temporal scatter stores
  41 ;;
  42 ;; == Predicate manipulation
  43 ;; ---- [PRED] Predicate-as-counter PTRUE
  44 ;; ---- [PRED] Predicate extraction
  45 ;; ---- [PRED] Predicate selection
  46 ;; ---- [PRED] Predicate count
  47 ;;
  48 ;; == Uniform unary arithmnetic
  49 ;; ---- [FP] Multi-register unary operations
  50 ;;
  51 ;; == Uniform binary arithmnetic
  52 ;; ---- [INT] Multi-register operations
  53 ;; ---- [INT] Clamp to minimum/maximum
  54 ;; ---- [INT] Multiplication
  55 ;; ---- [INT] Scaled high-part multiplication
  56 ;; ---- [INT] General binary arithmetic that maps to unspecs
  57 ;; ---- [INT] Saturating binary arithmetic
  58 ;; ---- [INT] Saturating left shifts
  59 ;; ---- [FP] Non-widening bfloat16 arithmetic
  60 ;; ---- [FP] Clamp to minimum/maximum
  61 ;;
  62 ;; == Uniform ternary arithmnetic
  63 ;; ---- [INT] General ternary arithmetic that maps to unspecs
  64 ;; ---- [INT] Multiply-and-accumulate operations
  65 ;; ---- [INT] Binary logic operations with rotation
  66 ;; ---- [INT] Ternary logic operations
  67 ;; ---- [INT] Shift-and-accumulate operations
  68 ;; ---- [INT] Shift-and-insert operations
  69 ;; ---- [INT] Sum of absolute differences
  70 ;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
  71 ;; ---- [FP] Mfloat8 dot products
  72 ;;
  73 ;; == Extending arithmetic
  74 ;; ---- [INT] Multi-register widening conversions
  75 ;; ---- [INT] Wide binary arithmetic
  76 ;; ---- [INT] Long binary arithmetic
  77 ;; ---- [INT] Long left shifts
  78 ;; ---- [INT] Long binary arithmetic with accumulation
  79 ;; ---- [FP] Multi-register operations
  80 ;; ---- [FP] Long multiplication with accumulation
  81 ;;
  82 ;; == Narrowing arithnetic
  83 ;; ---- [INT] Narrowing unary arithmetic
  84 ;; ---- [INT] Multi-vector narrowing unary arithmetic
  85 ;; ---- [INT] Narrowing binary arithmetic
  86 ;; ---- [INT] Narrowing right shifts
  87 ;; ---- [INT] Multi-vector narrowing right shifts
  88 ;;
  89 ;; == Pairwise arithmetic
  90 ;; ---- [INT] Pairwise arithmetic
  91 ;; ---- [FP] Pairwise arithmetic
  92 ;; ---- [INT] Pairwise arithmetic with accumulation
  93 ;;
  94 ;; == Complex arithmetic
  95 ;; ---- [INT] Complex binary operations
  96 ;; ---- [INT] Complex ternary operations
  97 ;; ---- [INT] Complex dot product
  98 ;;
  99 ;; == Conversions
 100 ;; ---- [FP<-FP] Widening conversions
 101 ;; ---- [FP<-FP] Narrowing conversions
 102 ;; ---- [FP<-FP] Multi-vector widening conversions
 103 ;; ---- [FP<-FP] Multi-vector narrowing conversions
 104 ;; ---- [FP<-INT] Multi-vector conversions
 105 ;; ---- [INT<-FP] Multi-vector conversions
 106 ;;
 107 ;; == Other arithmetic
 108 ;; ---- [INT] Reciprocal approximation
 109 ;; ---- [INT<-FP] Base-2 logarithm
 110 ;; ---- [INT] Polynomial multiplication
 111 ;;
 112 ;; == Comparisons and selects
 113 ;; ---- [INT,FP] Select based on predicates as counters
 114 ;; ---- [INT] While tests
 115 ;;
 116 ;; == Reductions
 117 ;; ---- [INT] Reduction to 128-bit vector
 118 ;; ---- [FP] Reduction to 128-bit vector
 119 ;;
 120 ;; == Permutation
 121 ;; ---- [INT,FP] Reversal
 122 ;; ---- [INT,FP] HVLA permutes
 123 ;; ---- [INT,FP] General permutes
 124 ;; ---- [INT,FP] Multi-register permutes
 125 ;; ---- [INT] Optional bit-permute extensions
 126 ;;
 127 ;; == General
 128 ;; ---- Check for aliases between pointers
 129 ;; ---- Histogram processing
 130 ;; ---- String matching
 131 ;; ---- Table lookup
 132 ;;
 133 ;; == Cryptographic extensions
 134 ;; ---- Optional AES extensions
 135 ;; ---- Optional SHA-3 extensions
 136 ;; ---- Optional SM4 extensions
 137
 138 ;; =========================================================================
 139 ;; == Moves
 140 ;; =========================================================================
 141
 142 ;; -------------------------------------------------------------------------
 143 ;; ---- Predicate to vector moves
 144 ;; -------------------------------------------------------------------------
 145 ;; Includes:
 146 ;; - PMOV (to vector) (SVE2p1)
 147 ;; -------------------------------------------------------------------------
 148
 149 (define_insn "@aarch64_pmov_to_<mode>"
 150   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
 151         (unspec:SVE_FULL_I
 152           [(match_operand:<VPRED> 1 "register_operand" "Upa")]
 153           UNSPEC_PMOV_UNPACK))]
 154   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 155   "pmov\t%0, %1.<Vetype>"
 156 )
 157
 158 (define_insn "@aarch64_pmov_lane_to_<mode>"
 159   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
 160         (unspec:SVE_FULL_I
 161           [(match_operand:SVE_FULL_I 1 "register_operand" "0")
 162            (match_operand:<VPRED> 2 "register_operand" "Upa")
 163            (match_operand:DI 3 "immediate_operand")]
 164           UNSPEC_PMOV_UNPACK_LANE))]
 165   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 166   "pmov\t%0[%3], %2.<Vetype>"
 167 )
 168
 169 ;; -------------------------------------------------------------------------
 170 ;; ---- Vector to predicate moves
 171 ;; -------------------------------------------------------------------------
 172 ;; Includes:
 173 ;; - PMOV (from vector) (SVE2p1)
 174 ;; -------------------------------------------------------------------------
 175
 176 (define_insn "@aarch64_pmov_from_<mode>"
 177   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 178         (unspec:VNx16BI
 179           [(match_operand:SVE_FULL_I 1 "register_operand" "w")]
 180           UNSPEC_PMOV_PACK))]
 181   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 182   "pmov\t%0.<Vetype>, %1"
 183 )
 184
 185 (define_insn "@aarch64_pmov_lane_from_<mode>"
 186   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 187         (unspec:VNx16BI
 188           [(match_operand:SVE_FULL_I 1 "register_operand" "w")
 189            (match_operand:DI 2 "immediate_operand")]
 190           UNSPEC_PMOV_PACK_LANE))]
 191   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 192   "pmov\t%0.<Vetype>, %1[%2]"
 193 )
 194
 195 ;; =========================================================================
 196 ;; == Loads
 197 ;; =========================================================================
 198
 199 ;; -------------------------------------------------------------------------
 200 ;; ---- 128-bit extending loads
 201 ;; -------------------------------------------------------------------------
 202 ;; Includes:
 203 ;; - LD1W (to .Q) (SVE2p1)
 204 ;; - LD1D (to .Q) (SVE2p1)
 205 ;; -------------------------------------------------------------------------
 206
 207 ;; There isn't really a natural way of representing these instructions
 208 ;; with the modes that we normally use:
 209 ;;
 210 ;; (1) It doesn't really make sense to use VNx1TI (or similar) for the
 211 ;;     result, since there's nothing that can be done with such a mode
 212 ;;     other than to cast it to another mode.  It also isn't how the
 213 ;;     ACLE represents it (for similar reasons).
 214 ;;
 215 ;; (2) Only the lowest bit of each 16 in the predicate is significant,
 216 ;;     but it doesn't really make sense to use VNx1BI to represent it,
 217 ;;     since there is no "PTRUE Pn.Q, ..." instruction.
 218 ;;
 219 ;; (3) We do however need to use VNx1DI and VNx1SI to represent the
 220 ;;     source memories, since none of the normal register modes would
 221 ;;     give the right extent and alignment information (with the alignment
 222 ;;     mattering only for -mstrict-align).
 223 (define_insn "@aarch64_sve_ld1_extendq<mode>"
 224   [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
 225         (unspec:SVE_FULL_SD
 226           [(match_operand:<VPRED> 2 "register_operand" "Upl")
 227            (match_operand:<LD1_EXTENDQ_MEM> 1 "memory_operand" "m")]
 228           UNSPEC_LD1_EXTENDQ))]
 229   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 230   "ld1<Vesize>\t{%0.q}, %2/z, %1"
 231 )
 232
 233 ;; -------------------------------------------------------------------------
 234 ;; ---- 128-bit structure loads
 235 ;; -------------------------------------------------------------------------
 236 ;; Includes:
 237 ;; - LD2Q (SVE2p1)
 238 ;; - LD3Q (SVE2p1)
 239 ;; - LD4Q (SVE2p1)
 240 ;; -------------------------------------------------------------------------
 241
 242 ;; Predicated LD[234]Q.
 243 (define_insn "@aarch64_sve_ldnq<mode>"
 244   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
 245         (unspec:SVE_STRUCT
 246           [(match_operand:<VPRED> 2 "register_operand" "Upl")
 247            (match_operand:<VNxTI> 1 "memory_operand" "m")]
 248           UNSPEC_LDNQ))]
 249   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 250   "ld<vector_count>q\t{%S0.q - %<Vendreg>0.q}, %2/z, %1"
 251 )
 252
 253 ;; -------------------------------------------------------------------------
 254 ;; ---- Multi-register loads predicated by a counter
 255 ;; -------------------------------------------------------------------------
 256 ;; Includes:
 257 ;; - LD1B
 258 ;; - LD1D
 259 ;; - LD1H
 260 ;; - LD1W
 261 ;; - LDNT1B
 262 ;; - LDNT1D
 263 ;; - LDNT1H
 264 ;; - LDNT1W
 265 ;; -------------------------------------------------------------------------
 266
 267 ;; Predicated LD1 (multi), with a count as predicate.
 268 (define_insn "@aarch64_<optab><mode>"
 269   [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
 270         (unspec:SVE_FULLx24
 271           [(match_operand:VNx16BI 2 "register_operand" "Uph")
 272            (match_operand:SVE_FULLx24 1 "memory_operand" "m")
 273            (match_operand:SVE_FULLx24 3 "aarch64_maskload_else_operand")]
 274           LD1_COUNT))]
 275   "TARGET_SVE2p1_OR_SME2"
 276   "<optab><Vesize>\t%0, %K2/z, %1"
 277   [(set_attr "stride_type" "ld1_consecutive")]
 278 )
 279
 280 (define_insn "@aarch64_<optab><mode>_strided2"
 281   [(set (match_operand:<VSINGLE> 0 "aarch64_simd_register" "=Uwd")
 282         (unspec:<VSINGLE>
 283           [(match_operand:VNx16BI 3 "register_operand" "Uph")
 284            (match_operand:SVE_FULLx2 2 "memory_operand" "m")
 285            (const_int 0)]
 286           LD1_COUNT))
 287    (set (match_operand:<VSINGLE> 1 "aarch64_simd_register" "=w")
 288         (unspec:<VSINGLE>
 289           [(match_dup 3)
 290            (match_dup 2)
 291            (const_int 1)]
 292           LD1_COUNT))]
 293   "TARGET_STREAMING_SME2
 294    && aarch64_strided_registers_p (operands, 2, 8)"
 295   "<optab><Vesize>\t{%0.<Vetype>, %1.<Vetype>}, %K3/z, %2"
 296   [(set_attr "stride_type" "ld1_strided")]
 297 )
 298
 299 (define_insn "@aarch64_<optab><mode>_strided4"
 300   [(set (match_operand:<VSINGLE> 0 "aarch64_simd_register" "=Uwt")
 301         (unspec:<VSINGLE>
 302           [(match_operand:VNx16BI 5 "register_operand" "Uph")
 303            (match_operand:SVE_FULLx4 4 "memory_operand" "m")
 304            (const_int 0)]
 305           LD1_COUNT))
 306    (set (match_operand:<VSINGLE> 1 "aarch64_simd_register" "=w")
 307         (unspec:<VSINGLE>
 308           [(match_dup 5)
 309            (match_dup 4)
 310            (const_int 1)]
 311           LD1_COUNT))
 312    (set (match_operand:<VSINGLE> 2 "aarch64_simd_register" "=w")
 313         (unspec:<VSINGLE>
 314           [(match_dup 5)
 315            (match_dup 4)
 316            (const_int 2)]
 317           LD1_COUNT))
 318    (set (match_operand:<VSINGLE> 3 "aarch64_simd_register" "=w")
 319         (unspec:<VSINGLE>
 320           [(match_dup 5)
 321            (match_dup 4)
 322            (const_int 3)]
 323           LD1_COUNT))]
 324   "TARGET_STREAMING_SME2
 325    && aarch64_strided_registers_p (operands, 4, 4)"
 326   "<optab><Vesize>\t{%0.<Vetype>, %1.<Vetype>, %2.<Vetype>, %3.<Vetype>}, %K5/z, %4"
 327   [(set_attr "stride_type" "ld1_strided")]
 328 )
 329
 330 ;; -------------------------------------------------------------------------
 331 ;; ---- 128-bit gather loads
 332 ;; -------------------------------------------------------------------------
 333 ;; Includes gather forms of:
 334 ;; - LD1Q (SVE2p1)
 335 ;; -------------------------------------------------------------------------
 336
 337 ;; Model this as operating on the largest valid element size, which is DI.
 338 ;; This avoids having to define move patterns & more for VNx1TI, which would
 339 ;; be difficult without a non-gather form of LD1Q.
 340 (define_insn "aarch64_gather_ld1q"
 341   [(set (match_operand:VNx2DI 0 "register_operand")
 342         (unspec:VNx2DI
 343           [(match_operand:VNx2BI 1 "register_operand")
 344            (match_operand:DI 2 "aarch64_reg_or_zero")
 345            (match_operand:VNx2DI 3 "register_operand")
 346            (mem:BLK (scratch))]
 347           UNSPEC_LD1_GATHER))]
 348   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 349   {@ [cons: =0, 1, 2, 3]
 350      [&w, Upl, Z, w] ld1q\t{%0.q}, %1/z, [%3.d]
 351      [?w, Upl, Z, 0] ^
 352      [&w, Upl, r, w] ld1q\t{%0.q}, %1/z, [%3.d, %2]
 353      [?w, Upl, r, 0] ^
 354   }
 355 )
 356
 357 ;; -------------------------------------------------------------------------
 358 ;; ---- Non-temporal gather loads
 359 ;; -------------------------------------------------------------------------
 360 ;; Includes gather forms of:
 361 ;; - LDNT1B
 362 ;; - LDNT1D
 363 ;; - LDNT1H
 364 ;; - LDNT1W
 365 ;; -------------------------------------------------------------------------
 366
 367 ;; Non-extending loads.
 368 (define_insn "@aarch64_gather_ldnt<mode>"
 369   [(set (match_operand:SVE_FULL_SD 0 "register_operand")
 370         (unspec:SVE_FULL_SD
 371           [(match_operand:<VPRED> 1 "register_operand")
 372            (match_operand:DI 2 "aarch64_reg_or_zero")
 373            (match_operand:<V_INT_EQUIV> 3 "register_operand")
 374            (mem:BLK (scratch))]
 375           UNSPEC_LDNT1_GATHER))]
 376   "TARGET_SVE2 && TARGET_NON_STREAMING"
 377   {@ [cons: =0, 1, 2, 3]
 378      [&w, Upl, Z, w    ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
 379      [?w, Upl, Z, 0    ] ^
 380      [&w, Upl, r, w    ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]
 381      [?w, Upl, r, 0    ] ^
 382   }
 383 )
 384
 385 ;; Extending loads.
 386 (define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
 387   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
 388         (unspec:SVE_FULL_SDI
 389           [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand")
 390            (ANY_EXTEND:SVE_FULL_SDI
 391              (unspec:SVE_PARTIAL_I
 392                [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand")
 393                 (match_operand:DI 2 "aarch64_reg_or_zero")
 394                 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand")
 395                 (mem:BLK (scratch))]
 396                UNSPEC_LDNT1_GATHER))]
 397           UNSPEC_PRED_X))]
 398   "TARGET_SVE2
 399    && TARGET_NON_STREAMING
 400    && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
 401   {@ [cons: =0, 1, 2, 3, 4]
 402      [&w, Upl, Z, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
 403      [?w, Upl, Z, 0, UplDnm] ^
 404      [&w, Upl, r, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]
 405      [?w, Upl, r, 0, UplDnm] ^
 406   }
 407   "&& !CONSTANT_P (operands[4])"
 408   {
 409     operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
 410   }
 411 )
 412
 413 ;; =========================================================================
 414 ;; == Stores
 415 ;; =========================================================================
 416
 417 ;; -------------------------------------------------------------------------
 418 ;; ---- 128-bit truncating stores
 419 ;; -------------------------------------------------------------------------
 420 ;; Includes:
 421 ;; - ST1W (from .Q) (SVE2p1)
 422 ;; - ST1D (from .Q) (SVE2p1)
 423 ;; -------------------------------------------------------------------------
 424
 425 ;; See the comment above the corresponding loads for a discussion about the
 426 ;; choice of modes.
 427 (define_insn "@aarch64_sve_st1_truncq<mode>"
 428   [(set (match_operand:<LD1_EXTENDQ_MEM> 0 "memory_operand" "+m")
 429         (unspec:<LD1_EXTENDQ_MEM>
 430           [(match_operand:<VPRED> 2 "register_operand" "Upl")
 431            (match_operand:SVE_FULL_SD 1 "register_operand" "w")
 432            (match_dup 0)]
 433           UNSPEC_ST1_TRUNCQ))]
 434   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 435   "st1<Vesize>\t{%1.q}, %2, %0"
 436 )
 437
 438 ;; -------------------------------------------------------------------------
 439 ;; ---- 128-bit structure stores
 440 ;; -------------------------------------------------------------------------
 441 ;; Includes:
 442 ;; - ST2Q (SVE2p1)
 443 ;; - ST3Q (SVE2p1)
 444 ;; - ST4Q (SVE2p1)
 445 ;; -------------------------------------------------------------------------
 446
 447 ;; Predicated ST[234].
 448 (define_insn "@aarch64_sve_stnq<mode>"
 449   [(set (match_operand:<VNxTI> 0 "memory_operand" "+m")
 450         (unspec:<VNxTI>
 451           [(match_operand:<VPRED> 2 "register_operand" "Upl")
 452            (match_operand:SVE_STRUCT 1 "register_operand" "w")
 453            (match_dup 0)]
 454           UNSPEC_STNQ))]
 455   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 456   "st<vector_count>q\t{%S1.q - %<Vendreg>1.q}, %2, %0"
 457 )
 458
 459 ;; -------------------------------------------------------------------------
 460 ;; ---- Multi-register stores predicated by a counter
 461 ;; -------------------------------------------------------------------------
 462 ;; Includes:
 463 ;; - ST1B
 464 ;; - ST1D
 465 ;; - ST1H
 466 ;; - ST1W
 467 ;; - STNT1B
 468 ;; - STNT1D
 469 ;; - STNT1H
 470 ;; - STNT1W
 471 ;; -------------------------------------------------------------------------
 472
 473 (define_insn "@aarch64_<optab><mode>"
 474   [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
 475         (unspec:SVE_FULLx24
 476           [(match_operand:VNx16BI 2 "register_operand" "Uph")
 477            (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
 478            (match_dup 0)]
 479           ST1_COUNT))]
 480   "TARGET_SVE2p1_OR_SME2"
 481   "<optab><Vesize>\t%1, %K2, %0"
 482   [(set_attr "stride_type" "st1_consecutive")]
 483 )
 484
 485 (define_insn "@aarch64_<optab><mode>_strided2"
 486   [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
 487         (unspec:SVE_FULLx24
 488           [(match_operand:VNx16BI 1 "register_operand" "Uph")
 489            (match_operand:<VSINGLE> 2 "aarch64_simd_register" "Uwd")
 490            (match_operand:<VSINGLE> 3 "aarch64_simd_register" "w")
 491            (match_dup 0)]
 492           ST1_COUNT))]
 493   "TARGET_STREAMING_SME2
 494    && aarch64_strided_registers_p (operands + 2, 2, 8)"
 495   "<optab><Vesize>\t{%2.<Vetype>, %3.<Vetype>}, %K1, %0"
 496   [(set_attr "stride_type" "st1_strided")]
 497 )
 498
 499 (define_insn "@aarch64_<optab><mode>_strided4"
 500   [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
 501         (unspec:SVE_FULLx24
 502           [(match_operand:VNx16BI 1 "register_operand" "Uph")
 503            (match_operand:<VSINGLE> 2 "aarch64_simd_register" "Uwt")
 504            (match_operand:<VSINGLE> 3 "aarch64_simd_register" "w")
 505            (match_operand:<VSINGLE> 4 "aarch64_simd_register" "w")
 506            (match_operand:<VSINGLE> 5 "aarch64_simd_register" "w")
 507            (match_dup 0)]
 508           ST1_COUNT))]
 509   "TARGET_STREAMING_SME2
 510    && aarch64_strided_registers_p (operands + 2, 4, 4)"
 511   "<optab><Vesize>\t{%2.<Vetype>, %3.<Vetype>, %4.<Vetype>, %5.<Vetype>}, %K1, %0"
 512   [(set_attr "stride_type" "st1_strided")]
 513 )
 514
 515 ;; -------------------------------------------------------------------------
 516 ;; ---- 128-bit scatter stores
 517 ;; -------------------------------------------------------------------------
 518 ;; Includes scatter form of:
 519 ;; - ST1Q (SVE2p1)
 520 ;; -------------------------------------------------------------------------
 521
 522 (define_insn "aarch64_scatter_st1q"
 523   [(set (mem:BLK (scratch))
 524         (unspec:BLK
 525           [(match_operand:VNx2BI 0 "register_operand")
 526            (match_operand:DI 1 "aarch64_reg_or_zero")
 527            (match_operand:VNx2DI 2 "register_operand")
 528            (match_operand:VNx2DI 3 "register_operand")]
 529           UNSPEC_ST1Q_SCATTER))]
 530   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
 531   {@ [ cons: 0 , 1 , 2 , 3  ]
 532      [ Upl     , Z , w , w  ] st1q\t{%3.q}, %0, [%2.d]
 533      [ Upl     , r , w , w  ] st1q\t{%3.q}, %0, [%2.d, %1]
 534   }
 535 )
 536
 537 ;; -------------------------------------------------------------------------
 538 ;; ---- Non-temporal scatter stores
 539 ;; -------------------------------------------------------------------------
 540 ;; Includes scatter forms of:
 541 ;; - STNT1B
 542 ;; - STNT1D
 543 ;; - STNT1H
 544 ;; - STNT1W
 545 ;; -------------------------------------------------------------------------
 546
 547 ;; Non-truncating stores.
 548 (define_insn "@aarch64_scatter_stnt<mode>"
 549   [(set (mem:BLK (scratch))
 550         (unspec:BLK
 551           [(match_operand:<VPRED> 0 "register_operand")
 552            (match_operand:DI 1 "aarch64_reg_or_zero")
 553            (match_operand:<V_INT_EQUIV> 2 "register_operand")
 554            (match_operand:SVE_FULL_SD 3 "register_operand")]
 555
 556           UNSPEC_STNT1_SCATTER))]
 557   "TARGET_SVE && TARGET_NON_STREAMING"
 558   {@ [ cons: 0 , 1 , 2 , 3  ]
 559      [ Upl     , Z , w , w  ] stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
 560      [ Upl     , r , w , w  ] stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]
 561   }
 562 )
 563
 564 ;; Truncating stores.
 565 (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
 566   [(set (mem:BLK (scratch))
 567         (unspec:BLK
 568           [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand")
 569            (match_operand:DI 1 "aarch64_reg_or_zero")
 570            (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand")
 571            (truncate:SVE_PARTIAL_I
 572              (match_operand:SVE_FULL_SDI 3 "register_operand"))]
 573           UNSPEC_STNT1_SCATTER))]
 574   "TARGET_SVE2
 575    && TARGET_NON_STREAMING
 576    && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
 577   {@ [ cons: 0 , 1 , 2 , 3  ]
 578      [ Upl     , Z , w , w  ] stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
 579      [ Upl     , r , w , w  ] stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]
 580   }
 581 )
 582
 583 ;; =========================================================================
 584 ;; == Predicate manipulation
 585 ;; =========================================================================
 586
 587 ;; -------------------------------------------------------------------------
 588 ;; ---- [PRED] Predicate-as-counter PTRUE
 589 ;; -------------------------------------------------------------------------
 590 ;; - PTRUE (predicate-as-counter form)
 591 ;; -------------------------------------------------------------------------
 592
 593 (define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>"
 594   [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
 595         (unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
 596   "TARGET_SVE2p1_OR_SME2"
 597   "ptrue\t%K0.<bits_etype>"
 598 )
 599
 600 ;; -------------------------------------------------------------------------
 601 ;; ---- [PRED] Predicate extraction
 602 ;; -------------------------------------------------------------------------
 603 ;; Includes
 604 ;; - PEXT
 605 ;; -------------------------------------------------------------------------
 606
 607 (define_insn "@aarch64_sve_pext<BHSD_BITS>"
 608   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 609         (unspec:VNx16BI
 610           [(match_operand:VNx16BI 1 "register_operand" "Uph")
 611            (match_operand:DI 2 "const_int_operand")
 612            (const_int BHSD_BITS)]
 613           UNSPEC_PEXT))]
 614   "TARGET_SVE2p1_OR_SME2"
 615   "pext\t%0.<bits_etype>, %K1[%2]"
 616 )
 617
 618 (define_insn "@aarch64_sve_pext<BHSD_BITS>x2"
 619   [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
 620         (unspec:VNx32BI
 621           [(match_operand:VNx16BI 1 "register_operand" "Uph")
 622            (match_operand:DI 2 "const_int_operand")
 623            (const_int BHSD_BITS)]
 624           UNSPEC_PEXTx2))]
 625   "TARGET_SVE2p1_OR_SME2"
 626   "pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]"
 627 )
 628
 629 ;; -------------------------------------------------------------------------
 630 ;; ---- [PRED] Predicate selection
 631 ;; -------------------------------------------------------------------------
 632 ;; Includes
 633 ;; - PSEL
 634 ;; -------------------------------------------------------------------------
 635
 636 (define_insn "@aarch64_sve_psel<BHSD_BITS>"
 637   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 638         (unspec:VNx16BI
 639           [(match_operand:VNx16BI 1 "register_operand" "Upa")
 640            (match_operand:VNx16BI 2 "register_operand" "Upa")
 641            (match_operand:SI 3 "register_operand" "Ucj")
 642            (const_int BHSD_BITS)]
 643           UNSPEC_PSEL))]
 644   "TARGET_SVE2p1_OR_SME"
 645   "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
 646 )
 647
 648 (define_insn "*aarch64_sve_psel<BHSD_BITS>_plus"
 649   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 650         (unspec:VNx16BI
 651           [(match_operand:VNx16BI 1 "register_operand" "Upa")
 652            (match_operand:VNx16BI 2 "register_operand" "Upa")
 653            (plus:SI
 654              (match_operand:SI 3 "register_operand" "Ucj")
 655              (match_operand:SI 4 "const_int_operand"))
 656            (const_int BHSD_BITS)]
 657           UNSPEC_PSEL))]
 658   "TARGET_SVE2p1_OR_SME
 659    && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
 660   "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
 661 )
 662
 663 ;; -------------------------------------------------------------------------
 664 ;; ---- [PRED] Predicate count
 665 ;; -------------------------------------------------------------------------
 666 ;; Includes
 667 ;; - CNTP (predicate as counter)
 668 ;; -------------------------------------------------------------------------
 669
 670 (define_insn "@aarch64_sve_cntp_c<BHSD_BITS>"
 671   [(set (match_operand:DI 0 "register_operand" "=r")
 672         (unspec:DI
 673           [(match_operand:VNx16BI 1 "register_operand" "Upa")
 674            (match_operand:DI 2 "const_int_operand")
 675            (const_int BHSD_BITS)]
 676           UNSPEC_CNTP_C))]
 677   "TARGET_SVE2p1_OR_SME2"
 678   "cntp\t%x0, %K1.<bits_etype>, vlx%2"
 679 )
 680
 681 ;; =========================================================================
 682 ;; == Uniform unary arithmnetic
 683 ;; =========================================================================
 684
 685 ;; -------------------------------------------------------------------------
 686 ;; ---- [FP] Multi-register unary operations
 687 ;; -------------------------------------------------------------------------
 688 ;; Includes:
 689 ;; - FRINTA
 690 ;; - FRINTM
 691 ;; - FRINTN
 692 ;; - FRINTP
 693 ;; -------------------------------------------------------------------------
 694
 695 (define_insn "<frint_pattern><mode>2"
 696   [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
 697         (unspec:SVE_SFx24
 698           [(match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")]
 699           SVE2_SFx24_UNARY))]
 700   "TARGET_STREAMING_SME2"
 701   "frint<frint_suffix>\t%0, %1"
 702 )
 703
 704 ;; =========================================================================
 705 ;; == Uniform binary arithmnetic
 706 ;; =========================================================================
 707
 708 ;; -------------------------------------------------------------------------
 709 ;; ---- [INT] Multi-register operations
 710 ;; -------------------------------------------------------------------------
 711 ;; Includes the multi-register forms of:
 712 ;; - ADD
 713 ;; - SMAX
 714 ;; - SMIN
 715 ;; - SQMULH
 716 ;; - SRSHL
 717 ;; - UMAX
 718 ;; - UMIN
 719 ;; - URSHL
 720 ;; -------------------------------------------------------------------------
 721
 722 (define_expand "<optab><mode>3"
 723   [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
 724         (SVE_INT_BINARY_MULTI:SVE_Ix24
 725           (match_operand:SVE_Ix24 1 "aligned_register_operand" "Uw<vector_count>")
 726           (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
 727   "TARGET_STREAMING_SME2"
 728 )
 729
 730 (define_insn "*<optab><mode>3"
 731   [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
 732         (SVE_INT_BINARY_MULTI:SVE_Ix24
 733           (match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
 734           (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
 735   "TARGET_STREAMING_SME2"
 736   "<sve_int_op>\t%0, %0, %2"
 737 )
 738
 739 (define_insn "@aarch64_sve_single_<optab><mode>"
 740   [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
 741         (SVE_INT_BINARY_SINGLE:SVE_Ix24
 742           (match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
 743           (vec_duplicate:SVE_Ix24
 744             (match_operand:<VSINGLE> 2 "register_operand" "x"))))]
 745   "TARGET_STREAMING_SME2"
 746   "<sve_int_op>\t%0, %0, %2.<Vetype>"
 747 )
 748
 749 (define_insn "@aarch64_sve_<sve_int_op><mode>"
 750   [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
 751         (unspec:SVE_Ix24
 752           [(match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
 753            (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")]
 754           SVE_INT_BINARY_MULTI))]
 755   "TARGET_STREAMING_SME2"
 756   "<sve_int_op>\t%0, %0, %2"
 757 )
 758
 759 (define_insn "@aarch64_sve_single_<sve_int_op><mode>"
 760   [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
 761         (unspec:SVE_Ix24
 762           [(match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
 763            (vec_duplicate:SVE_Ix24
 764              (match_operand:<VSINGLE> 2 "register_operand" "x"))]
 765           SVE_INT_BINARY_MULTI))]
 766   "TARGET_STREAMING_SME2"
 767   "<sve_int_op>\t%0, %0, %2.<Vetype>"
 768 )
 769
 770 ;; -------------------------------------------------------------------------
 771 ;; ---- [INT] Clamp to minimum/maximum
 772 ;; -------------------------------------------------------------------------
 773 ;; - SCLAMP
 774 ;; - UCLAMP
 775 ;; -------------------------------------------------------------------------
 776
 777 ;; The minimum is applied after the maximum, which matters if the maximum
 778 ;; bound is (unexpectedly) less than the minimum bound.
 779 (define_insn "@aarch64_sve_<su>clamp<mode>"
 780   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 781         (<max_opp>:SVE_FULL_I
 782           (USMAX:SVE_FULL_I
 783             (match_operand:SVE_FULL_I 1 "register_operand")
 784             (match_operand:SVE_FULL_I 2 "register_operand"))
 785           (match_operand:SVE_FULL_I 3 "register_operand")))]
 786   "TARGET_SVE2p1_OR_SME"
 787   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
 788      [       w, %0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
 789      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
 790   }
 791 )
 792
 793 (define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x"
 794   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 795         (unspec:SVE_FULL_I
 796           [(match_operand 4)
 797            (<max_opp>:SVE_FULL_I
 798              (unspec:SVE_FULL_I
 799                [(match_operand 5)
 800                 (USMAX:SVE_FULL_I
 801                   (match_operand:SVE_FULL_I 1 "register_operand")
 802                   (match_operand:SVE_FULL_I 2 "register_operand"))]
 803                UNSPEC_PRED_X)
 804              (match_operand:SVE_FULL_I 3 "register_operand"))]
 805           UNSPEC_PRED_X))]
 806   "TARGET_SVE2p1_OR_SME"
 807   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
 808      [       w, %0, w, w; *             ] #
 809      [     ?&w,  w, w, w; yes           ] #
 810   }
 811   "&& true"
 812   [(set (match_dup 0)
 813         (<max_opp>:SVE_FULL_I
 814           (USMAX:SVE_FULL_I
 815             (match_dup 1)
 816             (match_dup 2))
 817           (match_dup 3)))]
 818 )
 819
 820 (define_insn "@aarch64_sve_<su>clamp_single<mode>"
 821   [(set (match_operand:SVE_Ix24 0 "register_operand" "=Uw<vector_count>")
 822         (<max_opp>:SVE_Ix24
 823           (USMAX:SVE_Ix24
 824             (match_operand:SVE_Ix24 1 "register_operand" "0")
 825             (vec_duplicate:SVE_Ix24
 826               (match_operand:<VSINGLE> 2 "register_operand" "w")))
 827           (vec_duplicate:SVE_Ix24
 828             (match_operand:<VSINGLE> 3 "register_operand" "w"))))]
 829   "TARGET_STREAMING_SME2"
 830   "<su>clamp\t%0, %2.<Vetype>, %3.<Vetype>"
 831 )
 832
 833 ;; -------------------------------------------------------------------------
 834 ;; ---- [INT] Multiplication
 835 ;; -------------------------------------------------------------------------
 836 ;; Includes the lane and unpredicated forms of:
 837 ;; - MUL
 838 ;; -------------------------------------------------------------------------
 839
 840 (define_insn "@aarch64_mul_lane_<mode>"
 841   [(set (match_operand:SVE_FULL_HSDI_SIMD_DI 0 "register_operand" "=w")
 842         (mult:SVE_FULL_HSDI_SIMD_DI
 843           (unspec:SVE_FULL_HSDI_SIMD_DI
 844             [(match_operand:SVE_FULL_HSDI_SIMD_DI 2 "register_operand" "<sve_lane_con>")
 845              (match_operand:SI 3 "const_int_operand")]
 846             UNSPEC_SVE_LANE_SELECT)
 847           (match_operand:SVE_FULL_HSDI_SIMD_DI 1 "register_operand" "w")))]
 848   "TARGET_SVE2"
 849   "mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>[%3]"
 850 )
 851
 852 ;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
 853 ;; we include them here to allow matching simpler, unpredicated RTL.
 854 (define_insn "*aarch64_mul_unpredicated_<mode>"
 855   [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
 856         (mult:SVE_I_SIMD_DI
 857           (match_operand:SVE_I_SIMD_DI 1 "register_operand")
 858           (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand")))]
 859   "TARGET_SVE2"
 860   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
 861      [ w        , w , w   ; *              ] mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>
 862      [ w        , 0 , vsm ; *              ] mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
 863      [ ?&w      , w , vsm ; yes            ] movprfx\t%Z0, %Z1\;mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
 864   }
 865 )
 866
 867 ;; -------------------------------------------------------------------------
 868 ;; ---- [INT] Scaled high-part multiplication
 869 ;; -------------------------------------------------------------------------
 870 ;; The patterns in this section are synthetic.
 871 ;; -------------------------------------------------------------------------
 872
 873 ;; Unpredicated integer multiply-high-with-(round-and-)scale.
 874 (define_expand "<su>mulh<r>s<mode>3"
 875   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
 876         (unspec:SVE_FULL_BHSI
 877           [(match_dup 3)
 878            (unspec:SVE_FULL_BHSI
 879              [(match_operand:SVE_FULL_BHSI 1 "register_operand")
 880               (match_operand:SVE_FULL_BHSI 2 "register_operand")]
 881              MULHRS)]
 882           UNSPEC_PRED_X))]
 883   "TARGET_SVE2"
 884   {
 885     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
 886
 887     rtx prod_b = gen_reg_rtx (<VWIDE>mode);
 888     rtx prod_t = gen_reg_rtx (<VWIDE>mode);
 889     emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
 890                                                  operands[2]));
 891     emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
 892                                                  operands[2]));
 893
 894     rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
 895     emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
 896     emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
 897                                                 prod_t, shift));
 898
 899     DONE;
 900   }
 901 )
 902
 903 ;; -------------------------------------------------------------------------
 904 ;; ---- [INT] General binary arithmetic that maps to unspecs
 905 ;; -------------------------------------------------------------------------
 906 ;; Includes:
 907 ;; - SHADD
 908 ;; - SHSUB
 909 ;; - SHSUBR
 910 ;; - SQRSHL
 911 ;; - SQRSHLR
 912 ;; - SRHADD
 913 ;; - SRSHL
 914 ;; - SRSHLR
 915 ;; - SUQADD
 916 ;; - UHADD
 917 ;; - UHSUB
 918 ;; - UHSUBR
 919 ;; - UQRSHL
 920 ;; - UQRSHLR
 921 ;; - URHADD
 922 ;; - URSHL
 923 ;; - URSHLR
 924 ;; - USQADD
 925 ;; -------------------------------------------------------------------------
 926
 927 ;; Integer average (floor).
 928 (define_expand "<u>avg<mode>3_floor"
 929   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 930         (unspec:SVE_FULL_I
 931           [(match_dup 3)
 932            (unspec:SVE_FULL_I
 933              [(match_operand:SVE_FULL_I 1 "register_operand")
 934               (match_operand:SVE_FULL_I 2 "register_operand")]
 935              HADD)]
 936           UNSPEC_PRED_X))]
 937   "TARGET_SVE2"
 938   {
 939     operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
 940   }
 941 )
 942
 943 ;; Integer average (rounding).
 944 (define_expand "<u>avg<mode>3_ceil"
 945   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 946         (unspec:SVE_FULL_I
 947           [(match_dup 3)
 948            (unspec:SVE_FULL_I
 949              [(match_operand:SVE_FULL_I 1 "register_operand")
 950               (match_operand:SVE_FULL_I 2 "register_operand")]
 951              RHADD)]
 952           UNSPEC_PRED_X))]
 953   "TARGET_SVE2"
 954   {
 955     operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
 956   }
 957 )
 958
 959 ;; The immediate form of SQADD acts as an immediate form of SUQADD
 960 ;; over its full range.  In contrast to the ss_plus pattern, we do
 961 ;; not need to treat byte immediates specially.  E.g.:
 962 ;;
 963 ;;      SQADD   Z0.B, Z0.B, #128
 964 ;;
 965 ;; is equivalent to:
 966 ;;
 967 ;;      MOV     Z1.B, #128
 968 ;;      SUQADD  Z0.B, P0/M, Z0.B, Z1.B
 969 ;;
 970 ;; even though it's not equivalent to:
 971 ;;
 972 ;;      MOV     Z1.B, #128
 973 ;;      SQADD   Z0.B, P0/M, Z0.B, Z1.B  // Saturating subtraction of 128
 974 (define_insn "@aarch64_sve_suqadd<mode>_const"
 975   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
 976         (unspec:SVE_FULL_I
 977           [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
 978            (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
 979           UNSPEC_SUQADD))]
 980   "TARGET_SVE2"
 981   "@
 982    sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
 983    movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
 984   [(set_attr "movprfx" "*,yes")]
 985 )
 986
 987 ;; General predicated binary arithmetic.  All operations handled here
 988 ;; are commutative or have a reversed form.
 989 (define_insn "@aarch64_pred_<sve_int_op><mode>"
 990   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 991         (unspec:SVE_FULL_I
 992           [(match_operand:<VPRED> 1 "register_operand")
 993            (unspec:SVE_FULL_I
 994              [(match_operand:SVE_FULL_I 2 "register_operand")
 995               (match_operand:SVE_FULL_I 3 "register_operand")]
 996              SVE2_COND_INT_BINARY_REV)]
 997           UNSPEC_PRED_X))]
 998   "TARGET_SVE2"
 999   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
1000      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1001      [ w        , Upl , w , 0 ; *              ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1002      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1003   }
1004 )
1005
1006 ;; Predicated binary arithmetic with merging.
1007 (define_expand "@cond_<sve_int_op><mode>"
1008   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1009         (unspec:SVE_FULL_I
1010           [(match_operand:<VPRED> 1 "register_operand")
1011            (unspec:SVE_FULL_I
1012              [(match_dup 5)
1013               (unspec:SVE_FULL_I
1014                 [(match_operand:SVE_FULL_I 2 "register_operand")
1015                  (match_operand:SVE_FULL_I 3 "register_operand")]
1016                 SVE2_COND_INT_BINARY)]
1017              UNSPEC_PRED_X)
1018            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
1019           UNSPEC_SEL))]
1020   "TARGET_SVE2"
1021   {
1022     operands[5] = CONSTM1_RTX (<MODE>mode);
1023   }
1024 )
1025
1026 ;; Predicated binary arithmetic, merging with the first input.
1027 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
1028   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1029         (unspec:SVE_FULL_I
1030           [(match_operand:<VPRED> 1 "register_operand")
1031            (unspec:SVE_FULL_I
1032              [(match_operand 4)
1033               (unspec:SVE_FULL_I
1034                 [(match_operand:SVE_FULL_I 2 "register_operand")
1035                  (match_operand:SVE_FULL_I 3 "register_operand")]
1036                 SVE2_COND_INT_BINARY)]
1037              UNSPEC_PRED_X)
1038            (match_dup 2)]
1039           UNSPEC_SEL))]
1040   "TARGET_SVE2"
1041   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
1042      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1043      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1044   }
1045   "&& !CONSTANT_P (operands[4])"
1046   {
1047     operands[4] = CONSTM1_RTX (<VPRED>mode);
1048   }
1049 )
1050
1051 ;; Predicated binary arithmetic, merging with the second input.
1052 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
1053   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1054         (unspec:SVE_FULL_I
1055           [(match_operand:<VPRED> 1 "register_operand")
1056            (unspec:SVE_FULL_I
1057              [(match_operand 4)
1058               (unspec:SVE_FULL_I
1059                 [(match_operand:SVE_FULL_I 2 "register_operand")
1060                  (match_operand:SVE_FULL_I 3 "register_operand")]
1061                 SVE2_COND_INT_BINARY_REV)]
1062              UNSPEC_PRED_X)
1063            (match_dup 3)]
1064           UNSPEC_SEL))]
1065   "TARGET_SVE2"
1066   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
1067      [ w        , Upl , w , 0 ; *              ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1068      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1069   }
1070   "&& !CONSTANT_P (operands[4])"
1071   {
1072     operands[4] = CONSTM1_RTX (<VPRED>mode);
1073   }
1074 )
1075
1076 ;; Predicated binary operations, merging with an independent value.
1077 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
1078   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1079         (unspec:SVE_FULL_I
1080           [(match_operand:<VPRED> 1 "register_operand")
1081            (unspec:SVE_FULL_I
1082              [(match_operand 5)
1083               (unspec:SVE_FULL_I
1084                 [(match_operand:SVE_FULL_I 2 "register_operand")
1085                  (match_operand:SVE_FULL_I 3 "register_operand")]
1086                 SVE2_COND_INT_BINARY_REV)]
1087              UNSPEC_PRED_X)
1088            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
1089           UNSPEC_SEL))]
1090   "TARGET_SVE2
1091    && !rtx_equal_p (operands[2], operands[4])
1092    && !rtx_equal_p (operands[3], operands[4])"
1093   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
1094      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1095      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1096      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1097      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1098      [ ?&w      , Upl , w , w , w   ] #
1099   }
1100   "&& 1"
1101   {
1102     if (reload_completed
1103         && register_operand (operands[4], <MODE>mode)
1104         && !rtx_equal_p (operands[0], operands[4]))
1105       {
1106         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1107                                                  operands[4], operands[1]));
1108         operands[4] = operands[2] = operands[0];
1109       }
1110     else if (!CONSTANT_P (operands[5]))
1111       operands[5] = CONSTM1_RTX (<VPRED>mode);
1112     else
1113       FAIL;
1114   }
1115   [(set_attr "movprfx" "yes")]
1116 )
1117
1118 ;; Predicated binary operations with no reverse form, merging with zero.
1119 ;; At present we don't generate these patterns via a cond_* optab,
1120 ;; so there's no correctness requirement to handle merging with an
1121 ;; independent value.
1122 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
1123   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1124         (unspec:SVE_FULL_I
1125           [(match_operand:<VPRED> 1 "register_operand")
1126            (unspec:SVE_FULL_I
1127              [(match_operand 5)
1128               (unspec:SVE_FULL_I
1129                 [(match_operand:SVE_FULL_I 2 "register_operand")
1130                  (match_operand:SVE_FULL_I 3 "register_operand")]
1131                 SVE2_COND_INT_BINARY_NOREV)]
1132              UNSPEC_PRED_X)
1133            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
1134           UNSPEC_SEL))]
1135   "TARGET_SVE2"
1136   {@ [ cons: =0 , 1   , 2 , 3  ]
1137      [ &w       , Upl , 0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1138      [ &w       , Upl , w , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1139   }
1140   "&& !CONSTANT_P (operands[5])"
1141   {
1142     operands[5] = CONSTM1_RTX (<VPRED>mode);
1143   }
1144   [(set_attr "movprfx" "yes")]
1145 )
1146
1147 ;; -------------------------------------------------------------------------
1148 ;; ---- [INT] Saturating binary arithmetic
1149 ;; -------------------------------------------------------------------------
1150 ;; Includes:
1151 ;; - SQDMULH
1152 ;; - SQRDMULH
1153 ;; -------------------------------------------------------------------------
1154
1155 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1156   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
1157         (unspec:SVE_FULL_I
1158           [(match_operand:SVE_FULL_I 1 "register_operand" "w")
1159            (match_operand:SVE_FULL_I 2 "register_operand" "w")]
1160           SVE2_INT_BINARY))]
1161   "TARGET_SVE2"
1162   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
1163 )
1164
1165 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
1166   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1167         (unspec:SVE_FULL_HSDI
1168           [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1169            (unspec:SVE_FULL_HSDI
1170              [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
1171               (match_operand:SI 3 "const_int_operand")]
1172              UNSPEC_SVE_LANE_SELECT)]
1173           SVE2_INT_BINARY_LANE))]
1174   "TARGET_SVE2"
1175   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
1176 )
1177
1178 ;; -------------------------------------------------------------------------
1179 ;; ---- [INT] Saturating left shifts
1180 ;; -------------------------------------------------------------------------
1181 ;; Includes:
1182 ;; - SQSHL
1183 ;; - SQSHLR
1184 ;; - UQSHL
1185 ;; - UQSHLR
1186 ;; -------------------------------------------------------------------------
1187
1188 ;; Predicated left shifts.
1189 (define_insn "@aarch64_pred_<sve_int_op><mode>"
1190   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1191         (unspec:SVE_FULL_I
1192           [(match_operand:<VPRED> 1 "register_operand")
1193            (unspec:SVE_FULL_I
1194              [(match_operand:SVE_FULL_I 2 "register_operand")
1195               (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1196              SVE2_COND_INT_SHIFT)]
1197           UNSPEC_PRED_X))]
1198   "TARGET_SVE2"
1199   {@ [ cons: =0 , 1   , 2 , 3     ; attrs: movprfx ]
1200      [ w        , Upl , 0 , D<lr> ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1201      [ w        , Upl , 0 , w     ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1202      [ w        , Upl , w , 0     ; *              ] <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1203      [ ?&w      , Upl , w , D<lr> ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1204      [ ?&w      , Upl , w , w     ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1205   }
1206 )
1207
1208 ;; Predicated left shifts with merging.
1209 (define_expand "@cond_<sve_int_op><mode>"
1210   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1211         (unspec:SVE_FULL_I
1212           [(match_operand:<VPRED> 1 "register_operand")
1213            (unspec:SVE_FULL_I
1214              [(match_dup 5)
1215               (unspec:SVE_FULL_I
1216                 [(match_operand:SVE_FULL_I 2 "register_operand")
1217                  (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1218                 SVE2_COND_INT_SHIFT)]
1219              UNSPEC_PRED_X)
1220            (match_operand:SVE_FULL_I 4 "register_operand")]
1221           UNSPEC_SEL))]
1222   "TARGET_SVE2"
1223   {
1224     operands[5] = CONSTM1_RTX (<VPRED>mode);
1225   }
1226 )
1227
1228 ;; Predicated left shifts, merging with the first input.
1229 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
1230   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1231         (unspec:SVE_FULL_I
1232           [(match_operand:<VPRED> 1 "register_operand")
1233            (unspec:SVE_FULL_I
1234              [(match_operand 4)
1235               (unspec:SVE_FULL_I
1236                 [(match_operand:SVE_FULL_I 2 "register_operand")
1237                  (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1238                 SVE2_COND_INT_SHIFT)]
1239              UNSPEC_PRED_X)
1240            (match_dup 2)]
1241           UNSPEC_SEL))]
1242   "TARGET_SVE2"
1243   {@ [ cons: =0 , 1   , 2 , 3     ; attrs: movprfx ]
1244      [ w        , Upl , 0 , D<lr> ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1245      [ w        , Upl , 0 , w     ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1246      [ ?&w      , Upl , w , D<lr> ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1247      [ ?&w      , Upl , w , w     ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1248   }
1249   "&& !CONSTANT_P (operands[4])"
1250   {
1251     operands[4] = CONSTM1_RTX (<VPRED>mode);
1252   }
1253 )
1254
1255 ;; Predicated left shifts, merging with the second input.
1256 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
1257   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1258         (unspec:SVE_FULL_I
1259           [(match_operand:<VPRED> 1 "register_operand")
1260            (unspec:SVE_FULL_I
1261              [(match_operand 4)
1262               (unspec:SVE_FULL_I
1263                 [(match_operand:SVE_FULL_I 2 "register_operand")
1264                  (match_operand:SVE_FULL_I 3 "register_operand")]
1265                 SVE2_COND_INT_SHIFT)]
1266              UNSPEC_PRED_X)
1267            (match_dup 3)]
1268           UNSPEC_SEL))]
1269   "TARGET_SVE2"
1270   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
1271      [ w        , Upl , w , 0 ; *              ] <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1272      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1273   }
1274   "&& !CONSTANT_P (operands[4])"
1275   {
1276     operands[4] = CONSTM1_RTX (<VPRED>mode);
1277   }
1278 )
1279
1280 ;; Predicated left shifts, merging with an independent value.
1281 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
1282   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1283         (unspec:SVE_FULL_I
1284           [(match_operand:<VPRED> 1 "register_operand")
1285            (unspec:SVE_FULL_I
1286              [(match_operand 5)
1287               (unspec:SVE_FULL_I
1288                 [(match_operand:SVE_FULL_I 2 "register_operand")
1289                  (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1290                 SVE2_COND_INT_SHIFT)]
1291              UNSPEC_PRED_X)
1292            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
1293           UNSPEC_SEL))]
1294   "TARGET_SVE2
1295    && !rtx_equal_p (operands[2], operands[4])
1296    && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
1297   {@ [ cons: =0 , 1   , 2 , 3     , 4   ]
1298      [ &w       , Upl , 0 , D<lr> , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1299      [ &w       , Upl , 0 , w     , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1300      [ &w       , Upl , w , 0     , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1301      [ &w       , Upl , w , D<lr> , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1302      [ &w       , Upl , w , w     , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1303      [ &w       , Upl , w , D<lr> , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1304      [ &w       , Upl , w , w     , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1305      [ ?&w      , Upl , w , D<lr> , w   ] #
1306      [ ?&w      , Upl , w , w     , w   ] #
1307   }
1308   "&& 1"
1309   {
1310     if (reload_completed
1311         && register_operand (operands[4], <MODE>mode)
1312         && !rtx_equal_p (operands[0], operands[4]))
1313       {
1314         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1315                                                  operands[4], operands[1]));
1316         operands[4] = operands[2] = operands[0];
1317       }
1318     else if (!CONSTANT_P (operands[5]))
1319       operands[5] = CONSTM1_RTX (<VPRED>mode);
1320     else
1321       FAIL;
1322   }
1323   [(set_attr "movprfx" "yes")]
1324 )
1325
1326 ;; -------------------------------------------------------------------------
1327 ;; ---- [FP] Non-widening bfloat16 arithmetic
1328 ;; -------------------------------------------------------------------------
1329 ;; Includes:
1330 ;; - BFADD (SVE_B16B16)
1331 ;; - BFMAX (SVE_B16B16)
1332 ;; - BFMAXNM (SVE_B16B16)
1333 ;; - BFMIN (SVE_B16B16)
1334 ;; - BFMINNM (SVE_B16B16)
1335 ;; - BFMUL (SVE_B16B16)
1336 ;; -------------------------------------------------------------------------
1337
1338 ;; Predicated B16B16 binary operations.
1339 (define_insn "@aarch64_pred_<optab><mode>"
1340   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
1341         (unspec:VNx8BF_ONLY
1342           [(match_operand:<VPRED> 1 "register_operand")
1343            (match_operand:SI 4 "aarch64_sve_gp_strictness")
1344            (match_operand:VNx8BF_ONLY 2 "register_operand")
1345            (match_operand:VNx8BF_ONLY 3 "register_operand")]
1346           SVE_COND_FP_BINARY_OPTAB))]
1347   "TARGET_SSVE_B16B16 && <supports_bf16>"
1348   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx , is_rev ]
1349      [ w        , Upl , 0 , w ; *    , *   ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1350      [ w        , Upl , w , 0 ; *   , true ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1351      [ ?&w      , Upl , w , w ; yes , *    ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1352   }
1353   [(set_attr "is_bf16" "<is_bf16>")
1354    (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
1355 )
1356
1357 ;; -------------------------------------------------------------------------
1358 ;; ---- [FP] Clamp to minimum/maximum
1359 ;; -------------------------------------------------------------------------
1360 ;; - BFCLAMP (SVE_B16B16)
1361 ;; - FCLAMP
1362 ;; -------------------------------------------------------------------------
1363
1364 ;; The minimum is applied after the maximum, which matters if the maximum
1365 ;; bound is (unexpectedly) less than the minimum bound.
1366 (define_insn "@aarch64_sve_fclamp<mode>"
1367   [(set (match_operand:SVE_CLAMP_F 0 "register_operand")
1368         (unspec:SVE_CLAMP_F
1369           [(unspec:SVE_CLAMP_F
1370              [(match_operand:SVE_CLAMP_F 1 "register_operand")
1371               (match_operand:SVE_CLAMP_F 2 "register_operand")]
1372              UNSPEC_FMAXNM)
1373            (match_operand:SVE_CLAMP_F 3 "register_operand")]
1374           UNSPEC_FMINNM))]
1375   ""
1376   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
1377      [       w, %0, w, w; *             ] <b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1378      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1379   }
1380 )
1381
1382 (define_insn_and_split "*aarch64_sve_fclamp<mode>_x"
1383   [(set (match_operand:SVE_CLAMP_F 0 "register_operand")
1384         (unspec:SVE_CLAMP_F
1385           [(match_operand 4)
1386            (const_int SVE_RELAXED_GP)
1387            (unspec:SVE_CLAMP_F
1388              [(match_operand 5)
1389               (const_int SVE_RELAXED_GP)
1390               (match_operand:SVE_CLAMP_F 1 "register_operand")
1391               (match_operand:SVE_CLAMP_F 2 "register_operand")]
1392              UNSPEC_COND_FMAXNM)
1393            (match_operand:SVE_CLAMP_F 3 "register_operand")]
1394           UNSPEC_COND_FMINNM))]
1395   ""
1396   {@ [cons: =0,  1, 2, 3; attrs: movprfx]
1397      [       w, %0, w, w; *             ] #
1398      [     ?&w,  w, w, w; yes           ] #
1399   }
1400   "&& true"
1401   [(set (match_dup 0)
1402         (unspec:SVE_CLAMP_F
1403           [(unspec:SVE_CLAMP_F
1404              [(match_dup 1)
1405               (match_dup 2)]
1406              UNSPEC_FMAXNM)
1407            (match_dup 3)]
1408           UNSPEC_FMINNM))]
1409 )
1410
1411 (define_insn "@aarch64_sve_fclamp_single<mode>"
1412   [(set (match_operand:SVE_Fx24 0 "register_operand" "=Uw<vector_count>")
1413         (unspec:SVE_Fx24
1414           [(unspec:SVE_Fx24
1415              [(match_operand:SVE_Fx24 1 "register_operand" "0")
1416               (vec_duplicate:SVE_Fx24
1417                 (match_operand:<VSINGLE> 2 "register_operand" "w"))]
1418              UNSPEC_FMAXNM)
1419            (vec_duplicate:SVE_Fx24
1420              (match_operand:<VSINGLE> 3 "register_operand" "w"))]
1421           UNSPEC_FMINNM))]
1422   "TARGET_STREAMING_SME2"
1423   "<b>fclamp\t%0, %2.<Vetype>, %3.<Vetype>"
1424 )
1425
1426 ;; =========================================================================
1427 ;; == Uniform ternary arithmnetic
1428 ;; =========================================================================
1429
1430 ;; -------------------------------------------------------------------------
1431 ;; ---- [INT] General ternary arithmetic that maps to unspecs
1432 ;; -------------------------------------------------------------------------
1433 ;; Includes:
1434 ;; - ADCLB
1435 ;; - ADCLT
1436 ;; - EORBT
1437 ;; - EORTB
1438 ;; - SBCLB
1439 ;; - SBCLT
1440 ;; - SQRDMLAH
1441 ;; - SQRDMLSH
1442 ;; -------------------------------------------------------------------------
1443
1444 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1445   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1446         (unspec:SVE_FULL_I
1447           [(match_operand:SVE_FULL_I 2 "register_operand")
1448            (match_operand:SVE_FULL_I 3 "register_operand")
1449            (match_operand:SVE_FULL_I 1 "register_operand")]
1450           SVE2_INT_TERNARY))]
1451   "TARGET_SVE2"
1452   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1453      [ w        , 0 , w , w ; *              ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1454      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1455   }
1456 )
1457
1458 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
1459   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1460         (unspec:SVE_FULL_HSDI
1461           [(match_operand:SVE_FULL_HSDI 2 "register_operand")
1462            (unspec:SVE_FULL_HSDI
1463              [(match_operand:SVE_FULL_HSDI 3 "register_operand")
1464               (match_operand:SI 4 "const_int_operand")]
1465              UNSPEC_SVE_LANE_SELECT)
1466            (match_operand:SVE_FULL_HSDI 1 "register_operand")]
1467           SVE2_INT_TERNARY_LANE))]
1468   "TARGET_SVE2"
1469   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
1470      [ w        , 0 , w , <sve_lane_con> ; *              ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1471      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1472   }
1473 )
1474
1475 ;; -------------------------------------------------------------------------
1476 ;; ---- [INT] Multiply-and-accumulate operations
1477 ;; -------------------------------------------------------------------------
1478 ;; Includes the lane forms of:
1479 ;; - MLA
1480 ;; - MLS
1481 ;; -------------------------------------------------------------------------
1482
1483 (define_insn "@aarch64_sve_add_mul_lane_<mode>"
1484   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1485         (plus:SVE_FULL_HSDI
1486           (mult:SVE_FULL_HSDI
1487             (unspec:SVE_FULL_HSDI
1488               [(match_operand:SVE_FULL_HSDI 3 "register_operand")
1489                (match_operand:SI 4 "const_int_operand")]
1490               UNSPEC_SVE_LANE_SELECT)
1491             (match_operand:SVE_FULL_HSDI 2 "register_operand"))
1492           (match_operand:SVE_FULL_HSDI 1 "register_operand")))]
1493   "TARGET_SVE2"
1494   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
1495      [ w        , 0 , w , <sve_lane_con> ; *              ] mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1496      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1497   }
1498 )
1499
1500 (define_insn "@aarch64_sve_sub_mul_lane_<mode>"
1501   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1502         (minus:SVE_FULL_HSDI
1503           (match_operand:SVE_FULL_HSDI 1 "register_operand")
1504           (mult:SVE_FULL_HSDI
1505             (unspec:SVE_FULL_HSDI
1506               [(match_operand:SVE_FULL_HSDI 3 "register_operand")
1507                (match_operand:SI 4 "const_int_operand")]
1508               UNSPEC_SVE_LANE_SELECT)
1509             (match_operand:SVE_FULL_HSDI 2 "register_operand"))))]
1510   "TARGET_SVE2"
1511   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
1512      [ w        , 0 , w , <sve_lane_con> ; *              ] mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1513      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1514   }
1515 )
1516
1517 ;; -------------------------------------------------------------------------
1518 ;; ---- [INT] Binary logic operations with rotation
1519 ;; -------------------------------------------------------------------------
1520 ;; Includes:
1521 ;; - XAR
1522 ;; -------------------------------------------------------------------------
1523
1524 ;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction
1525 ;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD.
1526 ;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD
1527 ;; version should be preferred when available as it is non-destructive on its
1528 ;; input.
1529 (define_insn "@aarch64_sve2_xar<mode>"
1530   [(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w")
1531         (rotate:SVE_ASIMD_FULL_I
1532           (xor:SVE_ASIMD_FULL_I
1533             (match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w")
1534             (match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w"))
1535           (match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))]
1536   "TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)"
1537   {
1538     operands[3]
1539       = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)
1540                  - INTVAL (unwrap_const_vec_duplicate (operands[3])));
1541     if (which_alternative == 0)
1542       return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
1543     return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
1544   }
1545   [(set_attr "movprfx" "*,yes")]
1546 )
1547
1548 ;; -------------------------------------------------------------------------
1549 ;; ---- [INT] Ternary logic operations
1550 ;; -------------------------------------------------------------------------
1551 ;; Includes:
1552 ;; - BCAX
1553 ;; - BSL
1554 ;; - BSL1N
1555 ;; - BSL2N
1556 ;; - EOR3
1557 ;; - NBSL
1558 ;; -------------------------------------------------------------------------
1559
1560 ;; Unpredicated exclusive OR of AND.
1561 (define_expand "@aarch64_sve2_bcax<mode>"
1562   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1563         (xor:SVE_FULL_I
1564           (and:SVE_FULL_I
1565             (unspec:SVE_FULL_I
1566               [(match_dup 4)
1567                (not:SVE_FULL_I
1568                  (match_operand:SVE_FULL_I 3 "register_operand"))]
1569               UNSPEC_PRED_X)
1570             (match_operand:SVE_FULL_I 2 "register_operand"))
1571           (match_operand:SVE_FULL_I 1 "register_operand")))]
1572   "TARGET_SVE2"
1573   {
1574     operands[4] = CONSTM1_RTX (<VPRED>mode);
1575   }
1576 )
1577
1578 (define_insn_and_rewrite "*aarch64_sve2_bcax<mode>"
1579   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1580         (xor:SVE_FULL_I
1581           (and:SVE_FULL_I
1582             (unspec:SVE_FULL_I
1583               [(match_operand 4)
1584                (not:SVE_FULL_I
1585                  (match_operand:SVE_FULL_I 3 "register_operand"))]
1586               UNSPEC_PRED_X)
1587             (match_operand:SVE_FULL_I 2 "register_operand"))
1588           (match_operand:SVE_FULL_I 1 "register_operand")))]
1589   "TARGET_SVE2"
1590   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1591      [ w        , 0 , w , w ; *              ] bcax\t%0.d, %0.d, %2.d, %3.d
1592      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d
1593   }
1594   "&& !CONSTANT_P (operands[4])"
1595   {
1596     operands[4] = CONSTM1_RTX (<VPRED>mode);
1597   }
1598 )
1599
1600 ;; Unpredicated 3-way exclusive OR.
1601 (define_insn "@aarch64_sve2_eor3<mode>"
1602   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1603         (xor:SVE_FULL_I
1604           (xor:SVE_FULL_I
1605             (match_operand:SVE_FULL_I 1 "register_operand")
1606             (match_operand:SVE_FULL_I 2 "register_operand"))
1607           (match_operand:SVE_FULL_I 3 "register_operand")))]
1608   "TARGET_SVE2"
1609   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1610      [ w        , 0 , w , w ; *              ] eor3\t%0.d, %0.d, %2.d, %3.d
1611      [ w        , w , 0 , w ; *              ] eor3\t%0.d, %0.d, %1.d, %3.d
1612      [ w        , w , w , 0 ; *              ] eor3\t%0.d, %0.d, %1.d, %2.d
1613      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d
1614   }
1615 )
1616
1617 ;; Use NBSL for vector NOR.
1618 (define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
1619   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1620         (unspec:SVE_FULL_I
1621           [(match_operand 3)
1622            (and:SVE_FULL_I
1623              (not:SVE_FULL_I
1624                (match_operand:SVE_FULL_I 1 "register_operand"))
1625              (not:SVE_FULL_I
1626                (match_operand:SVE_FULL_I 2 "register_operand")))]
1627           UNSPEC_PRED_X))]
1628   "TARGET_SVE2"
1629   {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
1630      [ w        , %0 , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %0.d
1631      [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d
1632   }
1633   "&& !CONSTANT_P (operands[3])"
1634   {
1635     operands[3] = CONSTM1_RTX (<VPRED>mode);
1636   }
1637 )
1638
1639 ;; Use NBSL for vector NAND.
1640 (define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
1641   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1642         (unspec:SVE_FULL_I
1643           [(match_operand 3)
1644            (ior:SVE_FULL_I
1645              (not:SVE_FULL_I
1646                (match_operand:SVE_FULL_I 1 "register_operand"))
1647              (not:SVE_FULL_I
1648                (match_operand:SVE_FULL_I 2 "register_operand")))]
1649           UNSPEC_PRED_X))]
1650   "TARGET_SVE2"
1651   {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
1652      [ w        , %0 , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %2.d
1653      [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d
1654   }
1655   "&& !CONSTANT_P (operands[3])"
1656   {
1657     operands[3] = CONSTM1_RTX (<VPRED>mode);
1658   }
1659 )
1660
1661 ;; Unpredicated bitwise select.
1662 ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
1663 (define_expand "@aarch64_sve2_bsl<mode>"
1664   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1665         (xor:SVE_FULL_I
1666           (and:SVE_FULL_I
1667             (xor:SVE_FULL_I
1668               (match_operand:SVE_FULL_I 1 "register_operand")
1669               (match_operand:SVE_FULL_I 2 "register_operand"))
1670             (match_operand:SVE_FULL_I 3 "register_operand"))
1671           (match_dup 2)))]
1672   "TARGET_SVE2"
1673 )
1674
1675 (define_insn "*aarch64_sve2_bsl<mode>"
1676   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1677         (xor:SVE_FULL_I
1678           (and:SVE_FULL_I
1679             (xor:SVE_FULL_I
1680               (match_operand:SVE_FULL_I 1 "register_operand")
1681               (match_operand:SVE_FULL_I 2 "register_operand"))
1682             (match_operand:SVE_FULL_I 3 "register_operand"))
1683           (match_dup BSL_DUP)))]
1684   "TARGET_SVE2"
1685   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1686      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1687      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1688   }
1689 )
1690
1691 ;; Unpredicated bitwise inverted select.
1692 ;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
1693 (define_expand "@aarch64_sve2_nbsl<mode>"
1694   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1695         (unspec:SVE_FULL_I
1696           [(match_dup 4)
1697            (not:SVE_FULL_I
1698              (xor:SVE_FULL_I
1699                (and:SVE_FULL_I
1700                  (xor:SVE_FULL_I
1701                    (match_operand:SVE_FULL_I 1 "register_operand")
1702                    (match_operand:SVE_FULL_I 2 "register_operand"))
1703                  (match_operand:SVE_FULL_I 3 "register_operand"))
1704                (match_dup 2)))]
1705           UNSPEC_PRED_X))]
1706   "TARGET_SVE2"
1707   {
1708     operands[4] = CONSTM1_RTX (<VPRED>mode);
1709   }
1710 )
1711
1712 (define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
1713   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1714         (unspec:SVE_FULL_I
1715           [(match_operand 4)
1716            (not:SVE_FULL_I
1717              (xor:SVE_FULL_I
1718                (and:SVE_FULL_I
1719                  (xor:SVE_FULL_I
1720                    (match_operand:SVE_FULL_I 1 "register_operand")
1721                    (match_operand:SVE_FULL_I 2 "register_operand"))
1722                  (match_operand:SVE_FULL_I 3 "register_operand"))
1723                (match_dup BSL_DUP)))]
1724           UNSPEC_PRED_X))]
1725   "TARGET_SVE2"
1726   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1727      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1728      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1729   }
1730   "&& !CONSTANT_P (operands[4])"
1731   {
1732     operands[4] = CONSTM1_RTX (<VPRED>mode);
1733   }
1734 )
1735
1736 (define_insn "*aarch64_sve2_nbsl_unpred<mode>"
1737   [(set (match_operand:VDQ_I 0 "register_operand")
1738         (not:VDQ_I
1739           (xor:VDQ_I
1740             (and:VDQ_I
1741               (xor:VDQ_I
1742                 (match_operand:VDQ_I 1 "register_operand")
1743                 (match_operand:VDQ_I 2 "register_operand"))
1744               (match_operand:VDQ_I 3 "register_operand"))
1745             (match_dup BSL_DUP))))]
1746   "TARGET_SVE2"
1747   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1748      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1749      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1750   }
1751 )
1752
1753 ;; Unpredicated bitwise select with inverted first operand.
1754 ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
1755 (define_expand "@aarch64_sve2_bsl1n<mode>"
1756   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1757         (xor:SVE_FULL_I
1758           (and:SVE_FULL_I
1759             (unspec:SVE_FULL_I
1760               [(match_dup 4)
1761                (not:SVE_FULL_I
1762                  (xor:SVE_FULL_I
1763                    (match_operand:SVE_FULL_I 1 "register_operand")
1764                    (match_operand:SVE_FULL_I 2 "register_operand")))]
1765               UNSPEC_PRED_X)
1766             (match_operand:SVE_FULL_I 3 "register_operand"))
1767           (match_dup 2)))]
1768   "TARGET_SVE2"
1769   {
1770     operands[4] = CONSTM1_RTX (<VPRED>mode);
1771   }
1772 )
1773
1774 (define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
1775   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1776         (xor:SVE_FULL_I
1777           (and:SVE_FULL_I
1778             (unspec:SVE_FULL_I
1779               [(match_operand 4)
1780                (not:SVE_FULL_I
1781                  (xor:SVE_FULL_I
1782                    (match_operand:SVE_FULL_I 1 "register_operand")
1783                    (match_operand:SVE_FULL_I 2 "register_operand")))]
1784               UNSPEC_PRED_X)
1785             (match_operand:SVE_FULL_I 3 "register_operand"))
1786           (match_dup BSL_DUP)))]
1787   "TARGET_SVE2"
1788   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1789      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1790      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1791   }
1792   "&& !CONSTANT_P (operands[4])"
1793   {
1794     operands[4] = CONSTM1_RTX (<VPRED>mode);
1795   }
1796 )
1797
1798 (define_insn "*aarch64_sve2_bsl1n_unpred<mode>"
1799   [(set (match_operand:VDQ_I 0 "register_operand")
1800         (xor:VDQ_I
1801           (and:VDQ_I
1802             (not:VDQ_I
1803               (xor:VDQ_I
1804                 (match_operand:VDQ_I 1 "register_operand")
1805                 (match_operand:VDQ_I 2 "register_operand")))
1806             (match_operand:VDQ_I 3 "register_operand"))
1807           (match_dup BSL_DUP)))]
1808   "TARGET_SVE2"
1809   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1810      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1811      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1812   }
1813 )
1814
1815 ;; Unpredicated bitwise select with inverted second operand.
1816 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
1817 (define_expand "@aarch64_sve2_bsl2n<mode>"
1818   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1819         (ior:SVE_FULL_I
1820           (and:SVE_FULL_I
1821             (match_operand:SVE_FULL_I 1 "register_operand")
1822             (match_operand:SVE_FULL_I 3 "register_operand"))
1823           (unspec:SVE_FULL_I
1824             [(match_dup 4)
1825              (and:SVE_FULL_I
1826                (not:SVE_FULL_I
1827                  (match_operand:SVE_FULL_I 2 "register_operand"))
1828                (not:SVE_FULL_I
1829                  (match_dup 3)))]
1830             UNSPEC_PRED_X)))]
1831   "TARGET_SVE2"
1832   {
1833     operands[4] = CONSTM1_RTX (<VPRED>mode);
1834   }
1835 )
1836
1837 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1838   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1839         (ior:SVE_FULL_I
1840           (and:SVE_FULL_I
1841             (match_operand:SVE_FULL_I 1 "register_operand")
1842             (match_operand:SVE_FULL_I 2 "register_operand"))
1843           (unspec:SVE_FULL_I
1844             [(match_operand 4)
1845              (and:SVE_FULL_I
1846                (not:SVE_FULL_I
1847                  (match_operand:SVE_FULL_I 3 "register_operand"))
1848                (not:SVE_FULL_I
1849                  (match_dup BSL_DUP)))]
1850             UNSPEC_PRED_X)))]
1851   "TARGET_SVE2"
1852   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1853      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1854      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1855   }
1856   "&& !CONSTANT_P (operands[4])"
1857   {
1858     operands[4] = CONSTM1_RTX (<VPRED>mode);
1859   }
1860 )
1861
1862 ;; Unpredicated bitwise select with inverted second operand, alternative form.
1863 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
1864 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1865   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1866         (ior:SVE_FULL_I
1867           (and:SVE_FULL_I
1868             (match_operand:SVE_FULL_I 1 "register_operand")
1869             (match_operand:SVE_FULL_I 2 "register_operand"))
1870           (unspec:SVE_FULL_I
1871             [(match_operand 4)
1872              (and:SVE_FULL_I
1873                (not:SVE_FULL_I
1874                  (match_dup BSL_DUP))
1875                (not:SVE_FULL_I
1876                  (match_operand:SVE_FULL_I 3 "register_operand")))]
1877             UNSPEC_PRED_X)))]
1878   "TARGET_SVE2"
1879   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1880      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1881      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1882   }
1883   "&& !CONSTANT_P (operands[4])"
1884   {
1885     operands[4] = CONSTM1_RTX (<VPRED>mode);
1886   }
1887 )
1888
1889 (define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
1890   [(set (match_operand:VDQ_I 0 "register_operand")
1891         (ior:VDQ_I
1892           (and:VDQ_I
1893             (match_operand:VDQ_I 1 "register_operand")
1894             (match_operand:VDQ_I 2 "register_operand"))
1895           (and:VDQ_I
1896             (not:VDQ_I (match_operand:VDQ_I 3 "register_operand"))
1897             (not:VDQ_I (match_dup BSL_DUP)))))]
1898   "TARGET_SVE2"
1899   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1900      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1901      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1902   }
1903 )
1904
1905 (define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
1906   [(set (match_operand:VDQ_I 0 "register_operand")
1907         (ior:VDQ_I
1908           (and:VDQ_I
1909             (match_operand:VDQ_I 1 "register_operand")
1910             (match_operand:VDQ_I 2 "register_operand"))
1911           (and:VDQ_I
1912             (not:VDQ_I (match_dup BSL_DUP))
1913             (not:VDQ_I (match_operand:VDQ_I 3 "register_operand")))))]
1914   "TARGET_SVE2"
1915   {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
1916      [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1917      [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1918   }
1919 )
1920
1921 ;; -------------------------------------------------------------------------
1922 ;; ---- [INT] Shift-and-accumulate operations
1923 ;; -------------------------------------------------------------------------
1924 ;; Includes:
1925 ;; - SRSRA
1926 ;; - SSRA
1927 ;; - URSRA
1928 ;; - USRA
1929 ;; -------------------------------------------------------------------------
1930
1931 ;; Provide the natural unpredicated interface for SSRA and USRA.
1932 (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
1933   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1934         (plus:SVE_FULL_I
1935           (unspec:SVE_FULL_I
1936             [(match_dup 4)
1937              (SHIFTRT:SVE_FULL_I
1938                (match_operand:SVE_FULL_I 2 "register_operand")
1939                (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1940             UNSPEC_PRED_X)
1941          (match_operand:SVE_FULL_I 1 "register_operand")))]
1942   "TARGET_SVE2"
1943   {
1944     operands[4] = CONSTM1_RTX (<VPRED>mode);
1945   }
1946 )
1947
1948 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
1949 ;; isn't needed.
1950 (define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
1951   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1952         (plus:SVE_FULL_I
1953           (unspec:SVE_FULL_I
1954             [(match_operand 4)
1955              (SHIFTRT:SVE_FULL_I
1956                (match_operand:SVE_FULL_I 2 "register_operand")
1957                (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1958             UNSPEC_PRED_X)
1959          (match_operand:SVE_FULL_I 1 "register_operand")))]
1960   "TARGET_SVE2"
1961   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
1962      [ w        , 0 , w ; *              ] <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1963      [ ?&w      , w , w ; yes            ] movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1964   }
1965   "&& !CONSTANT_P (operands[4])"
1966   {
1967     operands[4] = CONSTM1_RTX (<VPRED>mode);
1968   }
1969 )
1970
1971 ;; SRSRA and URSRA.
1972 (define_insn "@aarch64_sve_add_<sve_int_op><mode>"
1973   [(set (match_operand:SVE_FULL_I 0 "register_operand")
1974         (plus:SVE_FULL_I
1975           (unspec:SVE_FULL_I
1976             [(match_operand:SVE_FULL_I 2 "register_operand")
1977              (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
1978             VRSHR_N)
1979          (match_operand:SVE_FULL_I 1 "register_operand")))]
1980   "TARGET_SVE2"
1981   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
1982      [ w        , 0 , w ; *              ] <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1983      [ ?&w      , w , w ; yes            ] movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1984   }
1985 )
1986
1987 ;; -------------------------------------------------------------------------
1988 ;; ---- [INT] Shift-and-insert operations
1989 ;; -------------------------------------------------------------------------
1990 ;; Includes:
1991 ;; - SLI
1992 ;; - SRI
1993 ;; -------------------------------------------------------------------------
1994
1995 ;; These instructions do not take MOVPRFX.
1996 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1997   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
1998         (unspec:SVE_FULL_I
1999           [(match_operand:SVE_FULL_I 1 "register_operand" "0")
2000            (match_operand:SVE_FULL_I 2 "register_operand" "w")
2001            (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
2002           SVE2_INT_SHIFT_INSERT))]
2003   "TARGET_SVE2"
2004   "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
2005 )
2006
2007 ;; -------------------------------------------------------------------------
2008 ;; ---- [INT] Sum of absolute differences
2009 ;; -------------------------------------------------------------------------
2010 ;; Includes:
2011 ;; - SABA
2012 ;; - UABA
2013 ;; -------------------------------------------------------------------------
2014
2015 ;; Provide the natural unpredicated interface for SABA and UABA.
2016 (define_expand "@aarch64_sve2_<su>aba<mode>"
2017   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2018         (plus:SVE_FULL_I
2019           (minus:SVE_FULL_I
2020             (unspec:SVE_FULL_I
2021               [(match_dup 4)
2022                (USMAX:SVE_FULL_I
2023                  (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
2024                  (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
2025               UNSPEC_PRED_X)
2026             (unspec:SVE_FULL_I
2027               [(match_dup 4)
2028                (<max_opp>:SVE_FULL_I
2029                  (match_dup 2)
2030                  (match_dup 3))]
2031               UNSPEC_PRED_X))
2032           (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
2033   "TARGET_SVE2"
2034   {
2035     operands[4] = CONSTM1_RTX (<VPRED>mode);
2036   }
2037 )
2038
2039 ;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
2040 ;; operation whose predicates aren't needed.
2041 (define_insn "*aarch64_sve2_<su>aba<mode>"
2042   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2043         (plus:SVE_FULL_I
2044           (minus:SVE_FULL_I
2045             (unspec:SVE_FULL_I
2046               [(match_operand 4)
2047                (USMAX:SVE_FULL_I
2048                  (match_operand:SVE_FULL_I 2 "register_operand")
2049                  (match_operand:SVE_FULL_I 3 "register_operand"))]
2050               UNSPEC_PRED_X)
2051             (unspec:SVE_FULL_I
2052               [(match_operand 5)
2053                (<max_opp>:SVE_FULL_I
2054                  (match_dup 2)
2055                  (match_dup 3))]
2056               UNSPEC_PRED_X))
2057           (match_operand:SVE_FULL_I 1 "register_operand")))]
2058   "TARGET_SVE2"
2059   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2060      [ w        , 0 , w , w ; *              ] <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
2061      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
2062   }
2063 )
2064
2065 ;; -------------------------------------------------------------------------
2066 ;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
2067 ;; -------------------------------------------------------------------------
2068 ;; Includes:
2069 ;; - FMLALB (vectors, FP8 to FP16) (FP8FMA)
2070 ;; - FMLALT (vectors, FP8 to FP16) (FP8FMA)
2071 ;; - FMLALB (indexed, FP8 to FP16) (FP8FMA)
2072 ;; - FMLALT (indexed, FP8 to FP16) (FP8FMA)
2073 ;; - FMLALLBB (vectors) (FP8FMA)
2074 ;; - FMLALLBB (indexed) (FP8FMA)
2075 ;; - FMLALLBT (vectors) (FP8FMA)
2076 ;; - FMLALLBT (indexed) (FP8FMA)
2077 ;; - FMLALLTB (vectors) (FP8FMA)
2078 ;; - FMLALLTB (indexed) (FP8FMA)
2079 ;; - FMLALLTT (vectors) (FP8FMA)
2080 ;; - FMLALLTT (indexed) (FP8FMA)
2081 ;; -------------------------------------------------------------------------
2082
2083 (define_insn "@aarch64_sve_add_<insn><mode>"
2084   [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
2085         (unspec:VNx8HF_ONLY
2086           [(match_operand:VNx8HF 1 "register_operand")
2087            (match_operand:VNx16QI 2 "register_operand")
2088            (match_operand:VNx16QI 3 "register_operand")
2089            (reg:DI FPM_REGNUM)]
2090           FMLAL_FP8_HF))]
2091   "TARGET_SSVE_FP8FMA"
2092   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2093      [ w        , 0 , w , w ; *              ] <insn>\t%0.h, %2.b, %3.b
2094      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b
2095   }
2096 )
2097
2098 (define_insn "@aarch64_sve_add_<insn><mode>"
2099   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2100         (unspec:VNx4SF_ONLY
2101           [(match_operand:VNx4SF 1 "register_operand")
2102            (match_operand:VNx16QI 2 "register_operand")
2103            (match_operand:VNx16QI 3 "register_operand")
2104            (reg:DI FPM_REGNUM)]
2105           FMLALL_FP8_SF))]
2106   "TARGET_SSVE_FP8FMA"
2107   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2108      [ w        , 0 , w , w ; *              ] <insn>\t%0.s, %2.b, %3.b
2109      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b
2110   }
2111 )
2112
2113 (define_insn "@aarch64_sve_add_lane_<insn><mode>"
2114   [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
2115         (unspec:VNx8HF_ONLY
2116           [(match_operand:VNx8HF 1 "register_operand")
2117            (match_operand:VNx16QI 2 "register_operand")
2118            (match_operand:VNx16QI 3 "register_operand")
2119            (match_operand:SI 4 "const_int_operand")
2120            (reg:DI FPM_REGNUM)]
2121           FMLAL_FP8_HF))]
2122   "TARGET_SSVE_FP8FMA"
2123   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2124      [ w        , 0 , w , y ; *              ] <insn>\t%0.h, %2.b, %3.b[%4]
2125      [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b[%4]
2126   }
2127 )
2128
2129 (define_insn "@aarch64_sve_add_lane_<insn><mode>"
2130   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2131         (unspec:VNx4SF_ONLY
2132           [(match_operand:VNx4SF 1 "register_operand")
2133            (match_operand:VNx16QI 2 "register_operand")
2134            (match_operand:VNx16QI 3 "register_operand")
2135            (match_operand:SI 4 "const_int_operand")
2136            (reg:DI FPM_REGNUM)]
2137           FMLALL_FP8_SF))]
2138   "TARGET_SSVE_FP8FMA"
2139   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2140      [ w        , 0 , w , y ; *              ] <insn>\t%0.s, %2.b, %3.b[%4]
2141      [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b[%4]
2142   }
2143 )
2144
2145 ;; -------------------------------------------------------------------------
2146 ;; ---- [FP] Mfloat8 dot products
2147 ;; -------------------------------------------------------------------------
2148 ;; Includes:
2149 ;; - FDOT (4-way, vectors) (FP8DOT4)
2150 ;; - FDOT (4-way, indexed) (FP8DOT4)
2151 ;; - FDOT (2-way, vectors) (FP8DOT2)
2152 ;; - FDOT (2-way, indexed) (FP8DOT2)
2153 ;; -------------------------------------------------------------------------
2154 (define_insn "@aarch64_sve_dot<mode>"
2155   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
2156         (unspec:SVE_FULL_HSF
2157           [(match_operand:SVE_FULL_HSF 1 "register_operand")
2158            (match_operand:VNx16QI 2 "register_operand")
2159            (match_operand:VNx16QI 3 "register_operand")
2160            (reg:DI FPM_REGNUM)]
2161           UNSPEC_DOT_FP8))]
2162   "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
2163   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2164      [ w        , 0 , w , w ; *              ] fdot\t%0.<Vetype>, %2.b, %3.b
2165      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b
2166   }
2167 )
2168
2169 (define_insn "@aarch64_sve_dot_lane<mode>"
2170   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
2171         (unspec:SVE_FULL_HSF
2172           [(match_operand:SVE_FULL_HSF 1 "register_operand")
2173            (match_operand:VNx16QI 2 "register_operand")
2174            (match_operand:VNx16QI 3 "register_operand")
2175            (match_operand:SI 4 "const_int_operand")
2176            (reg:DI FPM_REGNUM)]
2177           UNSPEC_DOT_LANE_FP8))]
2178   "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
2179   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2180      [ w        , 0 , w , y ; *              ] fdot\t%0.<Vetype>, %2.b, %3.b[%4]
2181      [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b[%4]
2182   }
2183 )
2184
2185 ;; =========================================================================
2186 ;; == Extending arithmetic
2187 ;; =========================================================================
2188
2189 ;; -------------------------------------------------------------------------
2190 ;; ---- [INT] Multi-register widening conversions
2191 ;; -------------------------------------------------------------------------
2192 ;; Includes:
2193 ;; - SUNPK
2194 ;; - UUNPK
2195 ;; -------------------------------------------------------------------------
2196
2197 (define_insn "<optab><mode><v2xwide>2"
2198   [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw2")
2199         (ANY_EXTEND:<V2XWIDE>
2200           (match_operand:SVE_FULL_BHSI 1 "register_operand" "w")))]
2201   "TARGET_STREAMING_SME2"
2202   "<su>unpk\t%0, %1.<Vetype>"
2203 )
2204
2205 (define_insn "<optab><mode><v2xwide>2"
2206   [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw4")
2207         (ANY_EXTEND:<V2XWIDE>
2208           (match_operand:SVE_FULL_BHSIx2 1 "aligned_register_operand" "Uw2")))]
2209   "TARGET_STREAMING_SME2"
2210   "<su>unpk\t%0, %1"
2211 )
2212
2213 ;; -------------------------------------------------------------------------
2214 ;; ---- [INT] Wide binary arithmetic
2215 ;; -------------------------------------------------------------------------
2216 ;; Includes:
2217 ;; - SADDWB
2218 ;; - SADDWT
2219 ;; - SSUBWB
2220 ;; - SSUBWT
2221 ;; - UADDWB
2222 ;; - UADDWT
2223 ;; - USUBWB
2224 ;; - USUBWT
2225 ;; -------------------------------------------------------------------------
2226
2227 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2228   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2229         (unspec:SVE_FULL_HSDI
2230           [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
2231            (match_operand:<VNARROW> 2 "register_operand" "w")]
2232           SVE2_INT_BINARY_WIDE))]
2233   "TARGET_SVE2"
2234   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
2235 )
2236
2237 ;; -------------------------------------------------------------------------
2238 ;; ---- [INT] Long binary arithmetic
2239 ;; -------------------------------------------------------------------------
2240 ;; Includes:
2241 ;; - SABDLB
2242 ;; - SABDLT
2243 ;; - SADDLB
2244 ;; - SADDLBT
2245 ;; - SADDLT
2246 ;; - SMULLB
2247 ;; - SMULLT
2248 ;; - SQDMULLB
2249 ;; - SQDMULLT
2250 ;; - SSUBLB
2251 ;; - SSUBLBT
2252 ;; - SSUBLT
2253 ;; - SSUBLTB
2254 ;; - UABDLB
2255 ;; - UABDLT
2256 ;; - UADDLB
2257 ;; - UADDLT
2258 ;; - UMULLB
2259 ;; - UMULLT
2260 ;; - USUBLB
2261 ;; - USUBLT
2262 ;; -------------------------------------------------------------------------
2263
2264 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2265   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2266         (unspec:SVE_FULL_HSDI
2267           [(match_operand:<VNARROW> 1 "register_operand" "w")
2268            (match_operand:<VNARROW> 2 "register_operand" "w")]
2269           SVE2_INT_BINARY_LONG))]
2270   "TARGET_SVE2"
2271   "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
2272 )
2273
2274 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
2275   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
2276         (unspec:SVE_FULL_SDI
2277           [(match_operand:<VNARROW> 1 "register_operand" "w")
2278            (unspec:<VNARROW>
2279              [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
2280               (match_operand:SI 3 "const_int_operand")]
2281              UNSPEC_SVE_LANE_SELECT)]
2282           SVE2_INT_BINARY_LONG_LANE))]
2283   "TARGET_SVE2"
2284   "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
2285 )
2286
2287 ;; -------------------------------------------------------------------------
2288 ;; ---- [INT] Long left shifts
2289 ;; -------------------------------------------------------------------------
2290 ;; Includes:
2291 ;; - SSHLLB
2292 ;; - SSHLLT
2293 ;; - USHLLB
2294 ;; - USHLLT
2295 ;; -------------------------------------------------------------------------
2296
2297 ;; The immediate range is enforced before generating the instruction.
2298 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2299   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2300         (unspec:SVE_FULL_HSDI
2301           [(match_operand:<VNARROW> 1 "register_operand" "w")
2302            (match_operand:DI 2 "const_int_operand")]
2303           SVE2_INT_SHIFT_IMM_LONG))]
2304   "TARGET_SVE2"
2305   "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
2306 )
2307
2308 ;; -------------------------------------------------------------------------
2309 ;; ---- [INT] Long binary arithmetic with accumulation
2310 ;; -------------------------------------------------------------------------
2311 ;; Includes:
2312 ;; - SABALB
2313 ;; - SABALT
2314 ;; - SDOT (SME2 or SVE2p1)
2315 ;; - SMLALB
2316 ;; - SMLALT
2317 ;; - SMLSLB
2318 ;; - SMLSLT
2319 ;; - SQDMLALB
2320 ;; - SQDMLALBT
2321 ;; - SQDMLALT
2322 ;; - SQDMLSLB
2323 ;; - SQDMLSLBT
2324 ;; - SQDMLSLT
2325 ;; - UABALB
2326 ;; - UABALT
2327 ;; - UDOT (SME2 or SVE2p1)
2328 ;; - UMLALB
2329 ;; - UMLALT
2330 ;; - UMLSLB
2331 ;; - UMLSLT
2332 ;; -------------------------------------------------------------------------
2333
2334 ;; Non-saturating MLA operations.
2335 (define_insn "@aarch64_sve_add_<sve_int_op><mode>"
2336   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2337         (plus:SVE_FULL_HSDI
2338           (unspec:SVE_FULL_HSDI
2339             [(match_operand:<VNARROW> 2 "register_operand")
2340              (match_operand:<VNARROW> 3 "register_operand")]
2341             SVE2_INT_ADD_BINARY_LONG)
2342           (match_operand:SVE_FULL_HSDI 1 "register_operand")))]
2343   "TARGET_SVE2"
2344   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2345      [ w        , 0 , w , w ; *              ] <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2346      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2347   }
2348 )
2349
2350 ;; Non-saturating MLA operations with lane select.
2351 (define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
2352   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2353         (plus:SVE_FULL_SDI
2354           (unspec:SVE_FULL_SDI
2355             [(match_operand:<VNARROW> 2 "register_operand")
2356              (unspec:<VNARROW>
2357                [(match_operand:<VNARROW> 3 "register_operand")
2358                 (match_operand:SI 4 "const_int_operand")]
2359                UNSPEC_SVE_LANE_SELECT)]
2360             SVE2_INT_ADD_BINARY_LONG_LANE)
2361           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
2362   "TARGET_SVE2"
2363   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
2364      [ w        , 0 , w , <sve_lane_con> ; *              ] <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2365      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2366   }
2367 )
2368
2369 ;; Saturating MLA operations.
2370 (define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
2371   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2372         (ss_plus:SVE_FULL_HSDI
2373           (unspec:SVE_FULL_HSDI
2374             [(match_operand:<VNARROW> 2 "register_operand")
2375              (match_operand:<VNARROW> 3 "register_operand")]
2376             SVE2_INT_QADD_BINARY_LONG)
2377           (match_operand:SVE_FULL_HSDI 1 "register_operand")))]
2378   "TARGET_SVE2"
2379   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2380      [ w        , 0 , w , w ; *              ] <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2381      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2382   }
2383 )
2384
2385 ;; Saturating MLA operations with lane select.
2386 (define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
2387   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2388         (ss_plus:SVE_FULL_SDI
2389           (unspec:SVE_FULL_SDI
2390             [(match_operand:<VNARROW> 2 "register_operand")
2391              (unspec:<VNARROW>
2392                [(match_operand:<VNARROW> 3 "register_operand")
2393                 (match_operand:SI 4 "const_int_operand")]
2394                UNSPEC_SVE_LANE_SELECT)]
2395             SVE2_INT_QADD_BINARY_LONG_LANE)
2396           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
2397   "TARGET_SVE2"
2398   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
2399      [ w        , 0 , w , <sve_lane_con> ; *              ] <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2400      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2401   }
2402 )
2403
2404 ;; Non-saturating MLS operations.
2405 (define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
2406   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2407         (minus:SVE_FULL_HSDI
2408           (match_operand:SVE_FULL_HSDI 1 "register_operand")
2409           (unspec:SVE_FULL_HSDI
2410             [(match_operand:<VNARROW> 2 "register_operand")
2411              (match_operand:<VNARROW> 3 "register_operand")]
2412             SVE2_INT_SUB_BINARY_LONG)))]
2413   "TARGET_SVE2"
2414   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2415      [ w        , 0 , w , w ; *              ] <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2416      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2417   }
2418 )
2419
2420 ;; Non-saturating MLS operations with lane select.
2421 (define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
2422   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2423         (minus:SVE_FULL_SDI
2424           (match_operand:SVE_FULL_SDI 1 "register_operand")
2425           (unspec:SVE_FULL_SDI
2426             [(match_operand:<VNARROW> 2 "register_operand")
2427              (unspec:<VNARROW>
2428                [(match_operand:<VNARROW> 3 "register_operand")
2429                 (match_operand:SI 4 "const_int_operand")]
2430                UNSPEC_SVE_LANE_SELECT)]
2431             SVE2_INT_SUB_BINARY_LONG_LANE)))]
2432   "TARGET_SVE2"
2433   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
2434      [ w        , 0 , w , <sve_lane_con> ; *              ] <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2435      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2436   }
2437 )
2438
2439 ;; Saturating MLS operations.
2440 (define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
2441   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2442         (ss_minus:SVE_FULL_HSDI
2443           (match_operand:SVE_FULL_HSDI 1 "register_operand")
2444           (unspec:SVE_FULL_HSDI
2445             [(match_operand:<VNARROW> 2 "register_operand")
2446              (match_operand:<VNARROW> 3 "register_operand")]
2447             SVE2_INT_QSUB_BINARY_LONG)))]
2448   "TARGET_SVE2"
2449   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2450      [ w        , 0 , w , w ; *              ] <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2451      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2452   }
2453 )
2454
2455 ;; Saturating MLS operations with lane select.
2456 (define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
2457   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2458         (ss_minus:SVE_FULL_SDI
2459           (match_operand:SVE_FULL_SDI 1 "register_operand")
2460           (unspec:SVE_FULL_SDI
2461             [(match_operand:<VNARROW> 2 "register_operand")
2462              (unspec:<VNARROW>
2463                [(match_operand:<VNARROW> 3 "register_operand")
2464                 (match_operand:SI 4 "const_int_operand")]
2465                UNSPEC_SVE_LANE_SELECT)]
2466             SVE2_INT_QSUB_BINARY_LONG_LANE)))]
2467   "TARGET_SVE2"
2468   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
2469      [ w        , 0 , w , <sve_lane_con> ; *              ] <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2470      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2471   }
2472 )
2473
2474 ;; Two-way dot-product.
2475 (define_insn "<sur>dot_prodvnx4sivnx8hi"
2476   [(set (match_operand:VNx4SI 0 "register_operand")
2477         (plus:VNx4SI
2478           (unspec:VNx4SI
2479             [(match_operand:VNx8HI 1 "register_operand")
2480              (match_operand:VNx8HI 2 "register_operand")]
2481             DOTPROD)
2482           (match_operand:VNx4SI 3 "register_operand")))]
2483   "TARGET_SVE2p1_OR_SME2"
2484   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2485      [ w        , w , w , 0 ; *              ] <sur>dot\t%0.s, %1.h, %2.h
2486      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h
2487   }
2488 )
2489
2490 ;; -------------------------------------------------------------------------
2491 ;; ---- [FP] Multi-register operations
2492 ;; -------------------------------------------------------------------------
2493 ;; Includes the multi-register forms of:
2494 ;; - FMAX
2495 ;; - FMAXNM
2496 ;; - FMIN
2497 ;; - FMINNM
2498 ;; -------------------------------------------------------------------------
2499
2500 (define_expand "@aarch64_sve_<maxmin_uns_op><mode>"
2501   [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
2502         (unspec:SVE_Fx24
2503           [(match_operand:SVE_Fx24 1 "aligned_register_operand" "Uw<vector_count>")
2504            (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
2505           SVE_FP_BINARY_MULTI))]
2506   "TARGET_STREAMING_SME2"
2507 )
2508
2509 (define_insn "*aarch64_sve_<maxmin_uns_op><mode>"
2510   [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
2511         (unspec:SVE_Fx24
2512           [(match_operand:SVE_Fx24 1 "aligned_register_operand" "%0")
2513            (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
2514           SVE_FP_BINARY_MULTI))]
2515   "TARGET_STREAMING_SME2"
2516   "<b><maxmin_uns_op>\t%0, %0, %2"
2517 )
2518
2519 (define_insn "@aarch64_sve_single_<maxmin_uns_op><mode>"
2520   [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
2521         (unspec:SVE_Fx24
2522           [(match_operand:SVE_Fx24 1 "aligned_register_operand" "0")
2523            (vec_duplicate:SVE_Fx24
2524              (match_operand:<VSINGLE> 2 "register_operand" "x"))]
2525           SVE_FP_BINARY_MULTI))]
2526   "TARGET_STREAMING_SME2"
2527   "<b><maxmin_uns_op>\t%0, %0, %2.<Vetype>"
2528 )
2529
2530 ;; -------------------------------------------------------------------------
2531 ;; ---- [FP] Long multiplication with accumulation
2532 ;; -------------------------------------------------------------------------
2533 ;; Includes:
2534 ;; - FDOT (SME2 or SVE2p1)
2535 ;; - FMLALB
2536 ;; - FMLALT
2537 ;; - FMLSLB
2538 ;; - FMLSLT
2539 ;; -------------------------------------------------------------------------
2540
2541 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
2542   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2543         (unspec:VNx4SF_ONLY
2544           [(match_operand:<VNARROW> 1 "register_operand")
2545            (match_operand:<VNARROW> 2 "register_operand")
2546            (match_operand:VNx4SF_ONLY 3 "register_operand")]
2547           SVE2_FP_TERNARY_LONG))]
2548   "TARGET_SVE2"
2549   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2550      [ w        , w , w , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
2551      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
2552   }
2553 )
2554
2555 (define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
2556   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2557         (unspec:VNx4SF_ONLY
2558           [(match_operand:<VNARROW> 1 "register_operand")
2559            (unspec:<VNARROW>
2560              [(match_operand:<VNARROW> 2 "register_operand")
2561               (match_operand:SI 3 "const_int_operand")]
2562              UNSPEC_SVE_LANE_SELECT)
2563            (match_operand:VNx4SF_ONLY 4 "register_operand")]
2564           SVE2_FP_TERNARY_LONG_LANE))]
2565   "TARGET_SVE2"
2566   {@ [ cons: =0 , 1 , 2              , 4 ; attrs: movprfx ]
2567      [ w        , w , <sve_lane_con> , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
2568      [ ?&w      , w , <sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
2569   }
2570 )
2571
2572 ;; Two-way dot-product.
2573 (define_insn "aarch64_sve_fdotvnx4sfvnx8hf"
2574   [(set (match_operand:VNx4SF 0 "register_operand")
2575         (plus:VNx4SF
2576           (unspec:VNx4SF
2577             [(match_operand:VNx8HF 1 "register_operand")
2578              (match_operand:VNx8HF 2 "register_operand")]
2579             UNSPEC_FDOT)
2580           (match_operand:VNx4SF 3 "register_operand")))]
2581   "TARGET_SVE2p1_OR_SME2"
2582   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2583      [ w        , w , w , 0 ; *              ] fdot\t%0.s, %1.h, %2.h
2584      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h
2585   }
2586 )
2587
2588 (define_insn "aarch64_fdot_prod_lanevnx4sfvnx8hf"
2589   [(set (match_operand:VNx4SF 0 "register_operand")
2590         (plus:VNx4SF
2591           (unspec:VNx4SF
2592             [(match_operand:VNx8HF 1 "register_operand")
2593              (unspec:VNx8HF
2594                [(match_operand:VNx8HF 2 "register_operand")
2595                 (match_operand:SI 3 "const_int_operand")]
2596                UNSPEC_SVE_LANE_SELECT)]
2597             UNSPEC_FDOT)
2598           (match_operand:VNx4SF 4 "register_operand")))]
2599   "TARGET_SVE2p1_OR_SME2"
2600   {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
2601      [ w        , w , y , 0 ; *              ] fdot\t%0.s, %1.h, %2.h[%3]
2602      [ ?&w      , w , y , w ; yes            ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3]
2603   }
2604 )
2605
2606 ;; =========================================================================
2607 ;; == Narrowing arithnetic
2608 ;; =========================================================================
2609
2610 ;; -------------------------------------------------------------------------
2611 ;; ---- [INT] Narrowing unary arithmetic
2612 ;; -------------------------------------------------------------------------
2613 ;; Includes:
2614 ;; - SQXTNB
2615 ;; - SQXTNT
2616 ;; - SQXTUNB
2617 ;; - SQXTUNT
2618 ;; - UQXTNB
2619 ;; - UQXTNT
2620 ;; -------------------------------------------------------------------------
2621
2622 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2623   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2624         (unspec:<VNARROW>
2625           [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
2626           SVE2_INT_UNARY_NARROWB))]
2627   "TARGET_SVE2"
2628   "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
2629 )
2630
2631 ;; These instructions do not take MOVPRFX.
2632 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2633   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2634         (unspec:<VNARROW>
2635           [(match_operand:<VNARROW> 1 "register_operand" "0")
2636            (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
2637           SVE2_INT_UNARY_NARROWT))]
2638   "TARGET_SVE2"
2639   "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
2640 )
2641
2642 ;; -------------------------------------------------------------------------
2643 ;; ---- [INT] Multi-vector narrowing unary arithmetic
2644 ;; -------------------------------------------------------------------------
2645 ;; Includes:
2646 ;; - SQCVT (SME2)
2647 ;; - SQCVTN (SME2)
2648 ;; - UQCVT (SME2)
2649 ;; - UQCVTN (SME2)
2650 ;; -------------------------------------------------------------------------
2651
2652 (define_insn "@aarch64_sve_<optab><VNx16QI_ONLY:mode><VNx16SI_ONLY:mode>"
2653   [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
2654         (unspec:VNx16QI_ONLY
2655           [(match_operand:VNx16SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
2656           SVE_QCVTxN))]
2657   "TARGET_STREAMING_SME2"
2658   "<optab>\t%0.b, %1"
2659 )
2660
2661 (define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8SI_ONLY:mode>"
2662   [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
2663         (unspec:VNx8HI_ONLY
2664           [(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
2665           SVE_QCVTxN))]
2666   ""
2667   "<optab>\t%0.h, %1"
2668 )
2669
2670 (define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8DI_ONLY:mode>"
2671   [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
2672         (unspec:VNx8HI_ONLY
2673           [(match_operand:VNx8DI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
2674           SVE_QCVTxN))]
2675   "TARGET_STREAMING_SME2"
2676   "<optab>\t%0.h, %1"
2677 )
2678
2679 ;; -------------------------------------------------------------------------
2680 ;; ---- [INT] Narrowing binary arithmetic
2681 ;; -------------------------------------------------------------------------
2682 ;; Includes:
2683 ;; - ADDHNB
2684 ;; - ADDHNT
2685 ;; - RADDHNB
2686 ;; - RADDHNT
2687 ;; - RSUBHNB
2688 ;; - RSUBHNT
2689 ;; - SUBHNB
2690 ;; - SUBHNT
2691 ;; -------------------------------------------------------------------------
2692
2693 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2694   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2695         (unspec:<VNARROW>
2696           [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
2697            (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
2698           SVE2_INT_BINARY_NARROWB))]
2699   "TARGET_SVE2"
2700   "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
2701 )
2702
2703 ;; These instructions do not take MOVPRFX.
2704 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2705   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2706         (unspec:<VNARROW>
2707           [(match_operand:<VNARROW> 1 "register_operand" "0")
2708            (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
2709            (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
2710           SVE2_INT_BINARY_NARROWT))]
2711   "TARGET_SVE2"
2712   "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
2713 )
2714
2715 ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
2716 (define_insn "*bitmask_shift_plus<mode>"
2717   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2718         (unspec:SVE_FULL_HSDI
2719            [(match_operand:<VPRED> 1)
2720             (lshiftrt:SVE_FULL_HSDI
2721               (plus:SVE_FULL_HSDI
2722                 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
2723                 (match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
2724               (match_operand:SVE_FULL_HSDI 4
2725                  "aarch64_simd_shift_imm_vec_exact_top" ""))]
2726           UNSPEC_PRED_X))]
2727   "TARGET_SVE2"
2728   "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
2729 )
2730
2731 ;; -------------------------------------------------------------------------
2732 ;; ---- [INT] Narrowing right shifts
2733 ;; -------------------------------------------------------------------------
2734 ;; Includes:
2735 ;; - RSHRNB
2736 ;; - RSHRNT
2737 ;; - SHRNB
2738 ;; - SHRNT
2739 ;; - SQRSHRNB
2740 ;; - SQRSHRNT
2741 ;; - SQRSHRUNB
2742 ;; - SQRSHRUNT
2743 ;; - SQSHRNB
2744 ;; - SQSHRNT
2745 ;; - SQSHRUNB
2746 ;; - SQSHRUNT
2747 ;; - UQRSHRNB
2748 ;; - UQRSHRNT
2749 ;; - UQSHRNB
2750 ;; - UQSHRNT
2751 ;; -------------------------------------------------------------------------
2752
2753 ;; The immediate range is enforced before generating the instruction.
2754 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2755   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2756         (unspec:<VNARROW>
2757           [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
2758            (match_operand:DI 2 "const_int_operand")]
2759           SVE2_INT_SHIFT_IMM_NARROWB))]
2760   "TARGET_SVE2"
2761   "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
2762 )
2763
2764 ;; The immediate range is enforced before generating the instruction.
2765 ;; These instructions do not take MOVPRFX.
2766 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2767   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2768         (unspec:<VNARROW>
2769           [(match_operand:<VNARROW> 1 "register_operand" "0")
2770            (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
2771            (match_operand:DI 3 "const_int_operand")]
2772           SVE2_INT_SHIFT_IMM_NARROWT))]
2773   "TARGET_SVE2"
2774   "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
2775 )
2776
2777 ;; -------------------------------------------------------------------------
2778 ;; ---- [INT] Multi-vector narrowing right shifts
2779 ;; -------------------------------------------------------------------------
2780 ;; Includes:
2781 ;; - SQRSHR (SME2)
2782 ;; - SQRSHRN (SVE2p1, SME2)
2783 ;; - SQRSHRU (SME2)
2784 ;; - SQRSHRUN (SVE2p1, SME2)
2785 ;; - UQRSHR (SME2)
2786 ;; - UQRSHRN (SVE2p1, SME2)
2787 ;; -------------------------------------------------------------------------
2788
2789 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2790   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2791         (unspec:<VNARROW>
2792           [(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>")
2793            (match_operand:DI 2 "const_int_operand")]
2794           SVE2_INT_SHIFT_IMM_NARROWxN))]
2795   "(<MODE>mode == VNx8SImode || TARGET_STREAMING_SME2)"
2796   "<sve_int_op>\t%0.<Ventype>, %1, #%2"
2797 )
2798
2799 ;; =========================================================================
2800 ;; == Pairwise arithmetic
2801 ;; =========================================================================
2802
2803 ;; -------------------------------------------------------------------------
2804 ;; ---- [INT] Pairwise arithmetic
2805 ;; -------------------------------------------------------------------------
2806 ;; Includes:
2807 ;; - ADDP
2808 ;; - SMAXP
2809 ;; - SMINP
2810 ;; - UMAXP
2811 ;; - UMINP
2812 ;; -------------------------------------------------------------------------
2813
2814 (define_insn "@aarch64_pred_<sve_int_op><mode>"
2815   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2816         (unspec:SVE_FULL_I
2817           [(match_operand:<VPRED> 1 "register_operand")
2818            (match_operand:SVE_FULL_I 2 "register_operand")
2819            (match_operand:SVE_FULL_I 3 "register_operand")]
2820           SVE2_INT_BINARY_PAIR))]
2821   "TARGET_SVE2"
2822   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
2823      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2824      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2825   }
2826 )
2827
2828 ;; -------------------------------------------------------------------------
2829 ;; ---- [FP] Pairwise arithmetic
2830 ;; -------------------------------------------------------------------------
2831 ;; Includes:
2832 ;; - FADDP
2833 ;; - FMAXP
2834 ;; - FMAXNMP
2835 ;; - FMINP
2836 ;; - FMINNMP
2837 ;; -------------------------------------------------------------------------
2838
2839 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
2840   [(set (match_operand:SVE_FULL_F 0 "register_operand")
2841         (unspec:SVE_FULL_F
2842           [(match_operand:<VPRED> 1 "register_operand")
2843            (match_operand:SVE_FULL_F 2 "register_operand")
2844            (match_operand:SVE_FULL_F 3 "register_operand")]
2845           SVE2_FP_BINARY_PAIR))]
2846   "TARGET_SVE2"
2847   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
2848      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2849      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2850   }
2851 )
2852
2853 ;; -------------------------------------------------------------------------
2854 ;; ---- [INT] Pairwise arithmetic with accumulation
2855 ;; -------------------------------------------------------------------------
2856 ;; Includes:
2857 ;; - SADALP
2858 ;; - UADALP
2859 ;; -------------------------------------------------------------------------
2860
2861 ;; Predicated pairwise absolute difference and accumulate with merging.
2862 (define_expand "@cond_<sve_int_op><mode>"
2863   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2864         (unspec:SVE_FULL_HSDI
2865           [(match_operand:<VPRED> 1 "register_operand")
2866            (unspec:SVE_FULL_HSDI
2867              [(match_dup 1)
2868               (match_operand:SVE_FULL_HSDI 2 "register_operand")
2869               (match_operand:<VNARROW> 3 "register_operand")]
2870              SVE2_INT_BINARY_PAIR_LONG)
2871            (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
2872           UNSPEC_SEL))]
2873   "TARGET_SVE2"
2874 {
2875   /* Only target code is aware of these operations, so we don't need
2876      to handle the fully-general case.  */
2877   gcc_assert (rtx_equal_p (operands[2], operands[4])
2878               || CONSTANT_P (operands[4]));
2879 })
2880
2881 ;; Predicated pairwise absolute difference and accumulate, merging with
2882 ;; the first input.
2883 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
2884   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2885         (unspec:SVE_FULL_HSDI
2886           [(match_operand:<VPRED> 1 "register_operand")
2887            (unspec:SVE_FULL_HSDI
2888              [(match_operand 4)
2889               (match_operand:SVE_FULL_HSDI 2 "register_operand")
2890               (match_operand:<VNARROW> 3 "register_operand")]
2891              SVE2_INT_BINARY_PAIR_LONG)
2892            (match_dup 2)]
2893           UNSPEC_SEL))]
2894   "TARGET_SVE2"
2895   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
2896      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2897      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2898   }
2899   "&& !CONSTANT_P (operands[4])"
2900   {
2901     operands[4] = CONSTM1_RTX (<VPRED>mode);
2902   }
2903 )
2904
2905 ;; Predicated pairwise absolute difference and accumulate, merging with zero.
2906 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
2907   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2908         (unspec:SVE_FULL_HSDI
2909           [(match_operand:<VPRED> 1 "register_operand")
2910            (unspec:SVE_FULL_HSDI
2911              [(match_operand 5)
2912               (match_operand:SVE_FULL_HSDI 2 "register_operand")
2913               (match_operand:<VNARROW> 3 "register_operand")]
2914              SVE2_INT_BINARY_PAIR_LONG)
2915            (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
2916           UNSPEC_SEL))]
2917   "TARGET_SVE2"
2918   {@ [ cons: =0 , 1   , 2 , 3  ]
2919      [ &w       , Upl , 0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2920      [ &w       , Upl , w , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2921   }
2922   "&& !CONSTANT_P (operands[5])"
2923   {
2924     operands[5] = CONSTM1_RTX (<VPRED>mode);
2925   }
2926   [(set_attr "movprfx" "yes")]
2927 )
2928
2929 ;; -------------------------------------------------------------------------
2930 ;; -- [FP] Absolute maximum and minimum
2931 ;; -------------------------------------------------------------------------
2932 ;; Includes:
2933 ;; - FAMAX
2934 ;; - FAMIN
2935 ;; -------------------------------------------------------------------------
2936 ;; Predicated floating-point absolute maximum and minimum.
2937 (define_insn_and_rewrite "*aarch64_pred_faminmax_fused"
2938   [(set (match_operand:SVE_FULL_F 0 "register_operand")
2939         (unspec:SVE_FULL_F
2940           [(match_operand:<VPRED> 1 "register_operand")
2941            (match_operand:SI 4 "aarch64_sve_gp_strictness")
2942            (unspec:SVE_FULL_F
2943              [(match_operand 5)
2944               (const_int SVE_RELAXED_GP)
2945               (match_operand:SVE_FULL_F 2 "register_operand")]
2946              UNSPEC_COND_FABS)
2947            (unspec:SVE_FULL_F
2948              [(match_operand 6)
2949               (const_int SVE_RELAXED_GP)
2950               (match_operand:SVE_FULL_F 3 "register_operand")]
2951              UNSPEC_COND_FABS)]
2952           SVE_COND_SMAXMIN))]
2953   "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2"
2954   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
2955      [ w        , Upl , %0 , w ; *              ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2956      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2957   }
2958   "&& (!rtx_equal_p (operands[1], operands[5])
2959        || !rtx_equal_p (operands[1], operands[6]))"
2960   {
2961     operands[5] = copy_rtx (operands[1]);
2962     operands[6] = copy_rtx (operands[1]);
2963   }
2964 )
2965
2966 ;; =========================================================================
2967 ;; == Complex arithmetic
2968 ;; =========================================================================
2969
2970 ;; -------------------------------------------------------------------------
2971 ;; ---- [INT] Complex binary operations
2972 ;; -------------------------------------------------------------------------
2973 ;; Includes:
2974 ;; - CADD
2975 ;; - SQCADD
2976 ;; -------------------------------------------------------------------------
2977
2978 (define_insn "@aarch64_sve_<optab><mode>"
2979   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2980         (unspec:SVE_FULL_I
2981           [(match_operand:SVE_FULL_I 1 "register_operand")
2982            (match_operand:SVE_FULL_I 2 "register_operand")]
2983           SVE2_INT_CADD))]
2984   "TARGET_SVE2"
2985   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
2986      [ w        , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
2987      [ ?&w      , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
2988   }
2989 )
2990
2991 ;; unpredicated optab pattern for auto-vectorizer
2992 (define_expand "cadd<rot><mode>3"
2993   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2994         (unspec:SVE_FULL_I
2995           [(match_operand:SVE_FULL_I 1 "register_operand")
2996            (match_operand:SVE_FULL_I 2 "register_operand")]
2997           SVE2_INT_CADD_OP))]
2998   "TARGET_SVE2"
2999 )
3000
3001 ;; -------------------------------------------------------------------------
3002 ;; ---- [INT] Complex ternary operations
3003 ;; -------------------------------------------------------------------------
3004 ;; Includes:
3005 ;; - CMLA
3006 ;; - SQRDCMLA
3007 ;; -------------------------------------------------------------------------
3008
3009 (define_insn "@aarch64_sve_<optab><mode>"
3010   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3011         (unspec:SVE_FULL_I
3012           [(match_operand:SVE_FULL_I 1 "register_operand")
3013            (match_operand:SVE_FULL_I 2 "register_operand")
3014            (match_operand:SVE_FULL_I 3 "register_operand")]
3015           SVE2_INT_CMLA))]
3016   "TARGET_SVE2"
3017   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3018      [ w        , 0 , w , w ; *              ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
3019      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
3020   }
3021 )
3022
3023 (define_insn "@aarch64_<optab>_lane_<mode>"
3024   [(set (match_operand:SVE_FULL_HSI 0 "register_operand")
3025         (unspec:SVE_FULL_HSI
3026           [(match_operand:SVE_FULL_HSI 1 "register_operand")
3027            (match_operand:SVE_FULL_HSI 2 "register_operand")
3028            (unspec:SVE_FULL_HSI
3029              [(match_operand:SVE_FULL_HSI 3 "register_operand")
3030               (match_operand:SI 4 "const_int_operand")]
3031              UNSPEC_SVE_LANE_SELECT)]
3032           SVE2_INT_CMLA))]
3033   "TARGET_SVE2"
3034   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
3035      [ w        , 0 , w , <sve_lane_con> ; *              ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
3036      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
3037   }
3038 )
3039
3040 ;; unpredicated optab pattern for auto-vectorizer
3041 ;; The complex mla/mls operations always need to expand to two instructions.
3042 ;; The first operation does half the computation and the second does the
3043 ;; remainder.  Because of this, expand early.
3044 (define_expand "cml<fcmac1><conj_op><mode>4"
3045   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3046         (plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand")
3047           (unspec:SVE_FULL_I
3048             [(match_operand:SVE_FULL_I 2 "register_operand")
3049              (match_operand:SVE_FULL_I 3 "register_operand")]
3050             SVE2_INT_CMLA_OP)))]
3051   "TARGET_SVE2"
3052 {
3053   rtx tmp = gen_reg_rtx (<MODE>mode);
3054   emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, operands[1],
3055                                                    operands[3], operands[2]));
3056   emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
3057                                                    operands[3], operands[2]));
3058   DONE;
3059 })
3060
3061 ;; unpredicated optab pattern for auto-vectorizer
3062 ;; The complex mul operations always need to expand to two instructions.
3063 ;; The first operation does half the computation and the second does the
3064 ;; remainder.  Because of this, expand early.
3065 (define_expand "cmul<conj_op><mode>3"
3066   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3067         (unspec:SVE_FULL_I
3068           [(match_operand:SVE_FULL_I 1 "register_operand")
3069            (match_operand:SVE_FULL_I 2 "register_operand")]
3070           SVE2_INT_CMUL_OP))]
3071   "TARGET_SVE2"
3072 {
3073   rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
3074   rtx tmp = gen_reg_rtx (<MODE>mode);
3075   emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, accum,
3076                                                    operands[2], operands[1]));
3077   emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
3078                                                    operands[2], operands[1]));
3079   DONE;
3080 })
3081
3082 ;; -------------------------------------------------------------------------
3083 ;; ---- [INT] Complex dot product
3084 ;; -------------------------------------------------------------------------
3085 ;; Includes:
3086 ;; - CDOT
3087 ;; -------------------------------------------------------------------------
3088
3089 (define_insn "@aarch64_sve_<optab><mode>"
3090   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3091         (unspec:SVE_FULL_SDI
3092           [(match_operand:SVE_FULL_SDI 1 "register_operand")
3093            (match_operand:<VSI2QI> 2 "register_operand")
3094            (match_operand:<VSI2QI> 3 "register_operand")]
3095           SVE2_INT_CDOT))]
3096   "TARGET_SVE2"
3097   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3098      [ w        , 0 , w , w ; *              ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
3099      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
3100   }
3101 )
3102
3103 (define_insn "@aarch64_<optab>_lane_<mode>"
3104   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3105         (unspec:SVE_FULL_SDI
3106           [(match_operand:SVE_FULL_SDI 1 "register_operand")
3107            (match_operand:<VSI2QI> 2 "register_operand")
3108            (unspec:<VSI2QI>
3109              [(match_operand:<VSI2QI> 3 "register_operand")
3110               (match_operand:SI 4 "const_int_operand")]
3111              UNSPEC_SVE_LANE_SELECT)]
3112           SVE2_INT_CDOT))]
3113   "TARGET_SVE2"
3114   {@ [ cons: =0 , 1 , 2 , 3              ; attrs: movprfx ]
3115      [ w        , 0 , w , <sve_lane_con> ; *              ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
3116      [ ?&w      , w , w , <sve_lane_con> ; yes            ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
3117   }
3118 )
3119
3120 ;; =========================================================================
3121 ;; == Conversions
3122 ;; =========================================================================
3123
3124 ;; -------------------------------------------------------------------------
3125 ;; ---- [FP<-FP] Widening conversions
3126 ;; -------------------------------------------------------------------------
3127 ;; Includes:
3128 ;; - BF1CVT (FP8)
3129 ;; - BF1CVTLT (FP8)
3130 ;; - BF2CVT (FP8)
3131 ;; - BF2CVTLT (FP8)
3132 ;; - F1CVT (FP8)
3133 ;; - F1CVTLT (FP8)
3134 ;; - F2CVT (FP8)
3135 ;; - F2CVTLT (FP8)
3136 ;; - FCVTLT
3137 ;; -------------------------------------------------------------------------
3138
3139 ;; Predicated convert long top.
3140 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
3141   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
3142         (unspec:SVE_FULL_SDF
3143           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3144            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3145            (match_operand:<VNARROW> 2 "register_operand" "0")]
3146           SVE2_COND_FP_UNARY_LONG))]
3147   "TARGET_SVE2"
3148   "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>"
3149 )
3150
3151 ;; Predicated convert long top with merging.
3152 (define_expand "@cond_<sve_fp_op><mode>"
3153   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3154         (unspec:SVE_FULL_SDF
3155           [(match_operand:<VPRED> 1 "register_operand")
3156            (unspec:SVE_FULL_SDF
3157              [(match_dup 1)
3158               (const_int SVE_STRICT_GP)
3159               (match_operand:<VNARROW> 2 "register_operand")]
3160              SVE2_COND_FP_UNARY_LONG)
3161            (match_operand:SVE_FULL_SDF 3 "register_operand")]
3162           UNSPEC_SEL))]
3163   "TARGET_SVE2"
3164 )
3165
3166 ;; These instructions do not take MOVPRFX.
3167 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
3168   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
3169         (unspec:SVE_FULL_SDF
3170           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3171            (unspec:SVE_FULL_SDF
3172              [(match_operand 4)
3173               (const_int SVE_RELAXED_GP)
3174               (match_operand:<VNARROW> 2 "register_operand" "w")]
3175              SVE2_COND_FP_UNARY_LONG)
3176            (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
3177           UNSPEC_SEL))]
3178   "TARGET_SVE2"
3179   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
3180   "&& !rtx_equal_p (operands[1], operands[4])"
3181   {
3182     operands[4] = copy_rtx (operands[1]);
3183   }
3184 )
3185
3186 (define_insn "*cond_<sve_fp_op><mode>_strict"
3187   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
3188         (unspec:SVE_FULL_SDF
3189           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3190            (unspec:SVE_FULL_SDF
3191              [(match_dup 1)
3192               (const_int SVE_STRICT_GP)
3193               (match_operand:<VNARROW> 2 "register_operand" "w")]
3194              SVE2_COND_FP_UNARY_LONG)
3195            (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
3196           UNSPEC_SEL))]
3197   "TARGET_SVE2"
3198   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
3199 )
3200
3201 (define_insn "@aarch64_sve2_fp8_cvt_<fp8_cvt_uns_op><mode>"
3202   [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
3203         (unspec:SVE_FULL_HF
3204           [(match_operand:VNx16QI 1 "register_operand" "w")
3205           (reg:DI FPM_REGNUM)]
3206           FP8CVT_UNS))]
3207   "TARGET_SSVE_FP8"
3208   "<b><fp8_cvt_uns_op>\t%0.h, %1.b"
3209 )
3210
3211 ;; -------------------------------------------------------------------------
3212 ;; ---- [FP<-FP] Narrowing conversions
3213 ;; -------------------------------------------------------------------------
3214 ;; Includes:
3215 ;; - FCVTNT
3216 ;; - FCVTX
3217 ;; - FCVTXNT
3218 ;; -------------------------------------------------------------------------
3219
3220 ;; Predicated FCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
3221 ;; pair because the even elements always have to be supplied for active
3222 ;; elements, even if the inactive elements don't matter.
3223 ;;
3224 ;; These instructions do not take MOVPRFX.
3225 (define_insn "@aarch64_sve_cvtnt<mode>"
3226   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
3227         (unspec:SVE_FULL_HSF
3228           [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
3229            (const_int SVE_STRICT_GP)
3230            (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
3231            (match_operand:<VWIDE> 3 "register_operand" "w")]
3232           UNSPEC_COND_FCVTNT))]
3233   "TARGET_SVE2"
3234   "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
3235 )
3236
3237 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
3238 ;; it supports MOVPRFX).
3239 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
3240   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3241         (unspec:VNx4SF_ONLY
3242           [(match_operand:<VWIDE_PRED> 1 "register_operand")
3243            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3244            (match_operand:<VWIDE> 2 "register_operand")]
3245           SVE2_COND_FP_UNARY_NARROWB))]
3246   "TARGET_SVE2"
3247   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3248      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3249      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3250   }
3251 )
3252
3253 ;; Predicated FCVTX with merging.
3254 (define_expand "@cond_<sve_fp_op><mode>"
3255   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3256         (unspec:VNx4SF_ONLY
3257           [(match_operand:<VWIDE_PRED> 1 "register_operand")
3258            (unspec:VNx4SF_ONLY
3259              [(match_dup 1)
3260               (const_int SVE_STRICT_GP)
3261               (match_operand:<VWIDE> 2 "register_operand")]
3262              SVE2_COND_FP_UNARY_NARROWB)
3263            (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
3264           UNSPEC_SEL))]
3265   "TARGET_SVE2"
3266 )
3267
3268 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed"
3269   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3270         (unspec:VNx4SF_ONLY
3271           [(match_operand:<VWIDE_PRED> 1 "register_operand")
3272            (unspec:VNx4SF_ONLY
3273              [(match_operand 4)
3274               (const_int SVE_RELAXED_GP)
3275               (match_operand:<VWIDE> 2 "register_operand")]
3276              SVE2_COND_FP_UNARY_NARROWB)
3277            (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
3278           UNSPEC_SEL))]
3279   "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3280   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3281      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3282      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3283      [ &w       , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3284   }
3285   "&& !rtx_equal_p (operands[1], operands[4])"
3286   {
3287     operands[4] = copy_rtx (operands[1]);
3288   }
3289 )
3290
3291 (define_insn "*cond_<sve_fp_op><mode>_any_strict"
3292   [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3293         (unspec:VNx4SF_ONLY
3294           [(match_operand:<VWIDE_PRED> 1 "register_operand")
3295            (unspec:VNx4SF_ONLY
3296              [(match_dup 1)
3297               (const_int SVE_STRICT_GP)
3298               (match_operand:<VWIDE> 2 "register_operand")]
3299              SVE2_COND_FP_UNARY_NARROWB)
3300            (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
3301           UNSPEC_SEL))]
3302   "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3303   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3304      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3305      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3306      [ &w       , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3307   }
3308 )
3309
3310 ;; Predicated FCVTXNT.  This doesn't give a natural aarch64_pred_*/cond_*
3311 ;; pair because the even elements always have to be supplied for active
3312 ;; elements, even if the inactive elements don't matter.
3313 ;;
3314 ;; These instructions do not take MOVPRFX.
3315 (define_insn "@aarch64_sve2_cvtxnt<mode>"
3316   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
3317         (unspec:<VNARROW>
3318           [(match_operand:<VPRED> 2 "register_operand" "Upl")
3319            (const_int SVE_STRICT_GP)
3320            (match_operand:<VNARROW> 1 "register_operand" "0")
3321            (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
3322           UNSPEC_COND_FCVTXNT))]
3323   "TARGET_SVE2"
3324   "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
3325 )
3326
3327 ;; -------------------------------------------------------------------------
3328 ;; ---- [FP<-FP] Multi-vector widening conversions
3329 ;; -------------------------------------------------------------------------
3330 ;; Includes the multi-register forms of:
3331 ;; - FCVT (SME_F16F16)
3332 ;; - FCVTL (SME_F16F16)
3333 ;; -------------------------------------------------------------------------
3334
3335 (define_insn "extendvnx8hfvnx8sf2"
3336   [(set (match_operand:VNx8SF 0 "aligned_register_operand" "=Uw2")
3337         (float_extend:VNx8SF
3338           (match_operand:VNx8HF 1 "register_operand" "w")))]
3339   "TARGET_STREAMING_SME_F16F16"
3340   "fcvt\t%0, %1.h"
3341 )
3342
3343 (define_insn "@aarch64_sve_cvtl<mode>"
3344   [(set (match_operand:VNx8SF_ONLY 0 "aligned_register_operand" "=Uw2")
3345         (unspec:VNx8SF_ONLY
3346           [(match_operand:VNx8HF 1 "register_operand" "w")]
3347           UNSPEC_FCVTL))]
3348   "TARGET_STREAMING_SME_F16F16"
3349   "fcvtl\t%0, %1.h"
3350 )
3351
3352 ;; -------------------------------------------------------------------------
3353 ;; ---- [FP<-FP] Multi-vector narrowing conversions
3354 ;; -------------------------------------------------------------------------
3355 ;; Includes the multi-register forms of:
3356 ;; - BFCVT (SME2)
3357 ;; - BFCVTN (SME2)
3358 ;; - FCVT (SME2)
3359 ;; - FCVTN (SME2)
3360 ;; - FCVTNB (FP8)
3361 ;; - FCVTNT (FP8)
3362 ;; -------------------------------------------------------------------------
3363
3364 (define_insn "truncvnx8sf<mode>2"
3365   [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
3366         (float_truncate:SVE_FULL_HF
3367           (match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")))]
3368   "TARGET_STREAMING_SME2"
3369   "<b>fcvt\t%0.h, %1"
3370 )
3371
3372 (define_insn "@aarch64_sve_cvtn<mode>"
3373   [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
3374         (unspec:SVE_FULL_HF
3375           [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")]
3376           UNSPEC_FCVTN))]
3377   "TARGET_STREAMING_SME2"
3378   "<b>fcvtn\t%0.h, %1"
3379 )
3380
3381 (define_insn "@aarch64_sve2_fp8_cvtn<mode>"
3382   [(set (match_operand:VNx16QI 0 "register_operand" "=w")
3383         (unspec:VNx16QI
3384           [(match_operand:SVE_FULL_HFx2 1 "aligned_register_operand" "Uw2")
3385            (reg:DI FPM_REGNUM)]
3386           UNSPEC_FP8FCVTN))]
3387   "TARGET_SSVE_FP8"
3388   "<b>fcvtn\t%0.b, %1"
3389 )
3390
3391 (define_insn "@aarch64_sve2_fp8_cvtnb<mode>"
3392   [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3393         (unspec:VNx16QI_ONLY
3394           [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")
3395            (reg:DI FPM_REGNUM)]
3396           UNSPEC_FCVTNB))]
3397   "TARGET_SSVE_FP8"
3398   "fcvtnb\t%0.b, %1"
3399 )
3400
3401 (define_insn "@aarch64_sve_cvtnt<mode>"
3402   [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3403         (unspec:VNx16QI_ONLY
3404           [(match_operand:VNx16QI_ONLY 1 "register_operand" "0")
3405            (match_operand:VNx8SF 2 "aligned_register_operand" "Uw2")
3406            (reg:DI FPM_REGNUM)]
3407           UNSPEC_FCVTNT))]
3408   "TARGET_SSVE_FP8"
3409   "fcvtnt\t%0.b, %2"
3410 )
3411
3412 ;; -------------------------------------------------------------------------
3413 ;; ---- [FP<-INT] Multi-vector conversions
3414 ;; -------------------------------------------------------------------------
3415 ;; Includes the multi-register forms of:
3416 ;; - SCVTF (SME2)
3417 ;; - UCVTF (SME2)
3418 ;; -------------------------------------------------------------------------
3419
3420 (define_insn "<optab><v_int_equiv><mode>2"
3421   [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
3422         (FLOATUORS:SVE_SFx24
3423           (match_operand:<V_INT_EQUIV> 1 "aligned_register_operand" "Uw<vector_count>")))]
3424   "TARGET_STREAMING_SME2"
3425   "<su_optab>cvtf\t%0, %1"
3426 )
3427
3428 ;; -------------------------------------------------------------------------
3429 ;; ---- [INT<-FP] Multi-vector conversions
3430 ;; -------------------------------------------------------------------------
3431 ;; Includes the multi-register forms of:
3432 ;; - FCVTZS (SME2)
3433 ;; - FCVTZU (SME2)
3434 ;; -------------------------------------------------------------------------
3435
3436 (define_insn "<optab><mode><v_int_equiv>2"
3437   [(set (match_operand:<V_INT_EQUIV> 0 "aligned_register_operand" "=Uw<vector_count>")
3438         (FIXUORS:<V_INT_EQUIV>
3439           (match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")))]
3440   "TARGET_STREAMING_SME2"
3441   "fcvtz<su>\t%0, %1"
3442 )
3443
3444 ;; =========================================================================
3445 ;; == Other arithmetic
3446 ;; =========================================================================
3447
3448 ;; -------------------------------------------------------------------------
3449 ;; ---- [INT] Reciprocal approximation
3450 ;; -------------------------------------------------------------------------
3451 ;; Includes:
3452 ;; - URECPE
3453 ;; - URSQRTE
3454 ;; -------------------------------------------------------------------------
3455
3456 ;; Predicated integer unary operations.
3457 (define_insn "@aarch64_pred_<sve_int_op><mode>"
3458   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
3459         (unspec:VNx4SI_ONLY
3460           [(match_operand:<VPRED> 1 "register_operand")
3461            (unspec:VNx4SI_ONLY
3462              [(match_operand:VNx4SI_ONLY 2 "register_operand")]
3463              SVE2_U32_UNARY)]
3464           UNSPEC_PRED_X))]
3465   "TARGET_SVE2"
3466   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3467      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3468      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3469   }
3470 )
3471
3472 ;; Predicated integer unary operations with merging.
3473 (define_expand "@cond_<sve_int_op><mode>"
3474   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
3475         (unspec:VNx4SI_ONLY
3476           [(match_operand:<VPRED> 1 "register_operand")
3477            (unspec:VNx4SI_ONLY
3478              [(match_dup 4)
3479               (unspec:VNx4SI_ONLY
3480                 [(match_operand:VNx4SI_ONLY 2 "register_operand")]
3481                 SVE2_U32_UNARY)]
3482              UNSPEC_PRED_X)
3483            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
3484           UNSPEC_SEL))]
3485   "TARGET_SVE2"
3486   {
3487     operands[4] = CONSTM1_RTX (<MODE>mode);
3488   }
3489 )
3490
3491 (define_insn_and_rewrite "*cond_<sve_int_op><mode>"
3492   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
3493         (unspec:VNx4SI_ONLY
3494           [(match_operand:<VPRED> 1 "register_operand")
3495            (unspec:VNx4SI_ONLY
3496              [(match_operand 4)
3497               (unspec:VNx4SI_ONLY
3498                 [(match_operand:VNx4SI_ONLY 2 "register_operand")]
3499                 SVE2_U32_UNARY)]
3500              UNSPEC_PRED_X)
3501            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
3502           UNSPEC_SEL))]
3503   "TARGET_SVE2"
3504   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3505      [ w        , Upl , w , 0  ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3506      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3507      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3508   }
3509   "&& !CONSTANT_P (operands[4])"
3510   {
3511     operands[4] = CONSTM1_RTX (<VPRED>mode);
3512   }
3513 )
3514
3515 ;; -------------------------------------------------------------------------
3516 ;; ---- [INT<-FP] Base-2 logarithm
3517 ;; -------------------------------------------------------------------------
3518 ;; Includes:
3519 ;; - FLOGB
3520 ;; -------------------------------------------------------------------------
3521
3522 ;; Predicated FLOGB.
3523 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
3524   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3525         (unspec:<V_INT_EQUIV>
3526           [(match_operand:<VPRED> 1 "register_operand")
3527            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3528            (match_operand:SVE_FULL_F 2 "register_operand")]
3529           SVE2_COND_INT_UNARY_FP))]
3530   "TARGET_SVE2"
3531   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3532      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3533      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3534   }
3535 )
3536
3537 ;; Predicated FLOGB with merging.
3538 (define_expand "@cond_<sve_fp_op><mode>"
3539   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3540         (unspec:<V_INT_EQUIV>
3541           [(match_operand:<VPRED> 1 "register_operand")
3542            (unspec:<V_INT_EQUIV>
3543              [(match_dup 1)
3544               (const_int SVE_STRICT_GP)
3545               (match_operand:SVE_FULL_F 2 "register_operand")]
3546              SVE2_COND_INT_UNARY_FP)
3547            (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
3548           UNSPEC_SEL))]
3549   "TARGET_SVE2"
3550 )
3551
3552 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
3553   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3554         (unspec:<V_INT_EQUIV>
3555           [(match_operand:<VPRED> 1 "register_operand")
3556            (unspec:<V_INT_EQUIV>
3557              [(match_operand 4)
3558               (const_int SVE_RELAXED_GP)
3559               (match_operand:SVE_FULL_F 2 "register_operand")]
3560              SVE2_COND_INT_UNARY_FP)
3561            (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
3562           UNSPEC_SEL))]
3563   "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3564   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3565      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3566      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3567      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3568   }
3569   "&& !rtx_equal_p (operands[1], operands[4])"
3570   {
3571     operands[4] = copy_rtx (operands[1]);
3572   }
3573 )
3574
3575 (define_insn "*cond_<sve_fp_op><mode>_strict"
3576   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3577         (unspec:<V_INT_EQUIV>
3578           [(match_operand:<VPRED> 1 "register_operand")
3579            (unspec:<V_INT_EQUIV>
3580              [(match_dup 1)
3581               (const_int SVE_STRICT_GP)
3582               (match_operand:SVE_FULL_F 2 "register_operand")]
3583              SVE2_COND_INT_UNARY_FP)
3584            (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
3585           UNSPEC_SEL))]
3586   "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3587   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3588      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3589      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3590      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3591   }
3592 )
3593
3594 ;; -------------------------------------------------------------------------
3595 ;; ---- [INT] Polynomial multiplication
3596 ;; -------------------------------------------------------------------------
3597 ;; Includes:
3598 ;; - PMUL
3599 ;; - PMULLB
3600 ;; - PMULLT
3601 ;; -------------------------------------------------------------------------
3602
3603 ;; Uniform PMUL.
3604 (define_insn "@aarch64_sve2_pmul<mode>"
3605   [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3606         (unspec:VNx16QI_ONLY
3607           [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
3608            (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
3609           UNSPEC_PMUL))]
3610   "TARGET_SVE2"
3611   "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3612 )
3613
3614 ;; Extending PMUL, with the results modeled as wider vectors.
3615 ;; This representation is only possible for .H and .D, not .Q.
3616 (define_insn "@aarch64_sve_<optab><mode>"
3617   [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
3618         (unspec:SVE_FULL_HDI
3619           [(match_operand:<VNARROW> 1 "register_operand" "w")
3620            (match_operand:<VNARROW> 2 "register_operand" "w")]
3621           SVE2_PMULL))]
3622   "TARGET_SVE2"
3623   "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
3624 )
3625
3626 ;; Extending PMUL, with the results modeled as pairs of values.
3627 ;; This representation works for .H, .D and .Q, with .Q requiring
3628 ;; the AES extension.  (This is enforced by the mode iterator.)
3629 (define_insn "@aarch64_sve_<optab><mode>"
3630   [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
3631         (unspec:SVE2_PMULL_PAIR_I
3632           [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
3633            (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
3634           SVE2_PMULL_PAIR))]
3635   "TARGET_SVE2"
3636   "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
3637 )
3638
3639 ;; =========================================================================
3640 ;; == Comparisons and selects
3641 ;; =========================================================================
3642
3643 ;; -------------------------------------------------------------------------
3644 ;; ---- [INT,FP] Select based on predicates as counters
3645 ;; -------------------------------------------------------------------------
3646
3647 (define_insn "@aarch64_sve_sel<mode>"
3648   [(set (match_operand:SVE_FULLx24 0 "register_operand" "=Uw<vector_count>")
3649         (unspec:SVE_FULLx24
3650           [(match_operand:<VPRED> 3 "register_operand" "Uph")
3651            (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
3652            (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
3653           UNSPEC_SEL))]
3654   "TARGET_STREAMING_SME2"
3655   "sel\t%0, %K3, %1, %2"
3656 )
3657
3658 ;; -------------------------------------------------------------------------
3659 ;; ---- [INT] While tests
3660 ;; -------------------------------------------------------------------------
3661 ;; Includes the x2 and count versions of:
3662 ;; - WHILEGE
3663 ;; - WHILEGT
3664 ;; - WHILEHI
3665 ;; - WHILEHS
3666 ;; - WHILELE
3667 ;; - WHILELO
3668 ;; - WHILELS
3669 ;; - WHILELT
3670 ;; -------------------------------------------------------------------------
3671
3672 (define_insn "@aarch64_sve_while<while_optab_cmp>_b<BHSD_BITS>_x2"
3673   [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
3674         (unspec:VNx32BI
3675           [(const_int SVE_WHILE_B_X2)
3676            (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
3677            (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
3678            (const_int BHSD_BITS)]
3679           SVE_WHILE_ORDER))
3680    (clobber (reg:CC_NZC CC_REGNUM))]
3681   "TARGET_SVE2p1_OR_SME2"
3682   "while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2"
3683 )
3684
3685 (define_insn "@aarch64_sve_while<while_optab_cmp>_c<BHSD_BITS>"
3686   [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
3687         (unspec:VNx16BI
3688           [(const_int SVE_WHILE_C)
3689            (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
3690            (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
3691            (const_int BHSD_BITS)
3692            (match_operand:DI 3 "const_int_operand")]
3693           SVE_WHILE_ORDER))
3694    (clobber (reg:CC_NZC CC_REGNUM))]
3695   "TARGET_SVE2p1_OR_SME2"
3696   "while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3"
3697 )
3698
3699 ;; =========================================================================
3700 ;; == Reductions
3701 ;; =========================================================================
3702
3703 ;; -------------------------------------------------------------------------
3704 ;; ---- [INT] Reduction to 128-bit vector
3705 ;; -------------------------------------------------------------------------
3706 ;; Includes:
3707 ;; - ADDQV (SVE2p1)
3708 ;; - ANDQV (SVE2p1)
3709 ;; - EORQV (SVE2p1)
3710 ;; - ORQV (SVE2p1)
3711 ;; - SMAXQV (SVE2p1)
3712 ;; - SMINQV (SVE2p1)
3713 ;; - UMAXQV (SVE2p1)
3714 ;; - UMINQV (SVE2p1)
3715 ;; -------------------------------------------------------------------------
3716
3717 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
3718   [(set (match_operand:<V128> 0 "register_operand" "=w")
3719         (unspec:<V128>
3720           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3721            (match_operand:SVE_FULL_I 2 "register_operand" "w")]
3722           SVE_INT_REDUCTION_128))]
3723   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
3724   "<optab>\t%0.<Vtype>, %1, %2.<Vetype>"
3725 )
3726
3727 ;; -------------------------------------------------------------------------
3728 ;; ---- [FP] Reduction to 128-bit vector
3729 ;; -------------------------------------------------------------------------
3730 ;; Includes:
3731 ;; - FADDQV (SVE2p1)
3732 ;; - FMAXNMQV (SVE2p1)
3733 ;; - FMAXQV (SVE2p1)
3734 ;; - FMINNMQV (SVE2p1)
3735 ;; - FMINQV (SVE2p1)
3736 ;; -------------------------------------------------------------------------
3737
3738 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
3739   [(set (match_operand:<V128> 0 "register_operand" "=w")
3740         (unspec:<V128>
3741           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3742            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3743           SVE_FP_REDUCTION_128))]
3744   "TARGET_SVE2p1 && TARGET_NON_STREAMING"
3745   "<optab>\t%0.<Vtype>, %1, %2.<Vetype>"
3746 )
3747
3748 ;; =========================================================================
3749 ;; == Permutation
3750 ;; =========================================================================
3751
3752 ;; -------------------------------------------------------------------------
3753 ;; ---- [INT,FP] Reversal
3754 ;; -------------------------------------------------------------------------
3755 ;; Includes:
3756 ;; - REVD
3757 ;; -------------------------------------------------------------------------
3758
3759 (define_insn "@aarch64_pred_<optab><mode>"
3760   [(set (match_operand:SVE_FULL 0 "register_operand")
3761         (unspec:SVE_FULL
3762           [(match_operand:VNx2BI 1 "register_operand")
3763            (unspec:SVE_FULL
3764              [(match_operand:SVE_FULL 2 "register_operand")]
3765              UNSPEC_REVD_ONLY)]
3766           UNSPEC_PRED_X))]
3767   "TARGET_SVE2p1_OR_SME"
3768   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3769      [ w        , Upl , 0 ; *              ] revd\t%0.q, %1/m, %2.q
3770      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q
3771   }
3772 )
3773
3774 (define_insn "@cond_<optab><mode>"
3775   [(set (match_operand:SVE_FULL 0 "register_operand")
3776         (unspec:SVE_FULL
3777           [(match_operand:VNx2BI 1 "register_operand")
3778            (unspec:SVE_FULL
3779              [(match_operand:SVE_FULL 2 "register_operand")]
3780              UNSPEC_REVD_ONLY)
3781            (match_operand:SVE_FULL 3 "register_operand")]
3782           UNSPEC_SEL))]
3783   "TARGET_SVE2p1_OR_SME"
3784   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3785      [ w        , Upl , w , 0  ; *              ] revd\t%0.q, %1/m, %2.q
3786      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q
3787   }
3788 )
3789
3790 ;; -------------------------------------------------------------------------
3791 ;; ---- [INT,FP] HVLA permutes
3792 ;; -------------------------------------------------------------------------
3793 ;; Includes:
3794 ;; - DUPQ (SVE2p1)
3795 ;; - EXTQ (SVE2p1)
3796 ;; -------------------------------------------------------------------------
3797
3798 (define_insn "@aarch64_sve_dupq<mode>"
3799   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
3800         (unspec:SVE_FULL
3801           [(match_operand:SVE_FULL 1 "register_operand" "w")
3802            (match_operand:SI 2 "const_int_operand")]
3803           UNSPEC_DUPQ))]
3804   "TARGET_SVE2p1
3805    && TARGET_NON_STREAMING
3806    && IN_RANGE (INTVAL (operands[2]) * (<elem_bits> / 8), 0, 15)"
3807   "dupq\t%0.<Vetype>, %1.<Vetype>[%2]"
3808 )
3809
3810 (define_insn "@aarch64_sve_extq<mode>"
3811   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
3812         (unspec:SVE_FULL
3813           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
3814            (match_operand:SVE_FULL 2 "register_operand" "w, w")
3815            (match_operand:SI 3 "const_int_operand")]
3816           UNSPEC_EXTQ))]
3817   "TARGET_SVE2p1
3818    && TARGET_NON_STREAMING
3819    && IN_RANGE (INTVAL (operands[3]) * (<elem_bits> / 8), 0, 15)"
3820   {
3821     operands[3] = GEN_INT (INTVAL (operands[3]) * (<elem_bits> / 8));
3822     return (which_alternative == 0
3823             ? "extq\\t%0.b, %0.b, %2.b, #%3"
3824             : "movprfx\t%0, %1\;extq\\t%0.b, %0.b, %2.b, #%3");
3825   }
3826   [(set_attr "movprfx" "*,yes")]
3827 )
3828
3829 ;; -------------------------------------------------------------------------
3830 ;; ---- [INT,FP] General permutes
3831 ;; -------------------------------------------------------------------------
3832 ;; Includes:
3833 ;; - TBL (vector pair form)
3834 ;; - TBX
3835 ;; - TBXQ (SVE2p1)
3836 ;; -------------------------------------------------------------------------
3837
3838 ;; TBL on a pair of data vectors.
3839 (define_insn "@aarch64_sve2_tbl2<mode>"
3840   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
3841         (unspec:SVE_FULL
3842           [(match_operand:<VDOUBLE> 1 "register_operand" "w")
3843            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
3844           UNSPEC_TBL2))]
3845   "TARGET_SVE2"
3846   "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
3847 )
3848
3849 ;; TBX(Q).  These instructions do not take MOVPRFX.
3850 (define_insn "@aarch64_sve_<perm_insn><mode>"
3851   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
3852         (unspec:SVE_FULL
3853           [(match_operand:SVE_FULL 1 "register_operand" "0")
3854            (match_operand:SVE_FULL 2 "register_operand" "w")
3855            (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
3856           SVE_TBX))]
3857   "TARGET_SVE2"
3858   "<perm_insn>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
3859 )
3860
3861 ;; -------------------------------------------------------------------------
3862 ;; ---- [INT,FP] Multi-register permutes
3863 ;; -------------------------------------------------------------------------
3864 ;; Includes:
3865 ;; - ZIP
3866 ;; - UZP
3867 ;; -------------------------------------------------------------------------
3868
3869 (define_insn "@aarch64_sve_<optab><mode>"
3870   [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
3871         (unspec:SVE_FULLx2
3872           [(match_operand:<VSINGLE> 1 "register_operand" "w")
3873            (match_operand:<VSINGLE> 2 "register_operand" "w")]
3874           SVE2_x24_PERMUTE))]
3875   "TARGET_STREAMING_SME2"
3876   "<perm_insn>\t%0, %1.<Vetype>, %2.<Vetype>"
3877 )
3878
3879 (define_insn "@aarch64_sve_<optab><mode>"
3880   [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
3881         (unspec:SVE_FULLx2
3882           [(match_operand:<VSINGLE> 1 "register_operand" "w")
3883            (match_operand:<VSINGLE> 2 "register_operand" "w")]
3884           SVE2_x24_PERMUTEQ))]
3885   "TARGET_STREAMING_SME2"
3886   "<perm_insn>\t{%S0.q - %T0.q}, %1.q, %2.q"
3887 )
3888
3889 (define_insn "@aarch64_sve_<optab><mode>"
3890   [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
3891         (unspec:SVE_FULLx4
3892           [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
3893           SVE2_x24_PERMUTE))]
3894   "TARGET_STREAMING_SME2"
3895   "<perm_insn>\t%0, %1"
3896 )
3897
3898 (define_insn "@aarch64_sve_<optab><mode>"
3899   [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
3900         (unspec:SVE_FULLx4
3901           [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
3902           SVE2_x24_PERMUTEQ))]
3903   "TARGET_STREAMING_SME2"
3904   "<perm_insn>\t{%S0.q - %V0.q}, {%S1.q - %V1.q}"
3905 )
3906
3907 ;; -------------------------------------------------------------------------
3908 ;; ---- [INT] Optional bit-permute extensions
3909 ;; -------------------------------------------------------------------------
3910 ;; Includes:
3911 ;; - BDEP
3912 ;; - BEXT
3913 ;; - BGRP
3914 ;; -------------------------------------------------------------------------
3915
3916 (define_insn "@aarch64_sve_<sve_int_op><mode>"
3917   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3918         (unspec:SVE_FULL_I
3919           [(match_operand:SVE_FULL_I 1 "register_operand" "w")
3920            (match_operand:SVE_FULL_I 2 "register_operand" "w")]
3921           SVE2_INT_BITPERM))]
3922   "TARGET_SVE2_BITPERM"
3923   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3924 )
3925
3926 ;; =========================================================================
3927 ;; == General
3928 ;; =========================================================================
3929
3930 ;; -------------------------------------------------------------------------
3931 ;; ---- Check for aliases between pointers
3932 ;; -------------------------------------------------------------------------
3933 ;; The patterns in this section are synthetic: WHILERW and WHILEWR are
3934 ;; defined in aarch64-sve.md instead.
3935 ;; -------------------------------------------------------------------------
3936
3937 ;; Use WHILERW and WHILEWR to accelerate alias checks.  This is only
3938 ;; possible if the accesses we're checking are exactly the same size
3939 ;; as an SVE vector.
3940 (define_expand "check_<raw_war>_ptrs<mode>"
3941   [(match_operand:GPI 0 "register_operand")
3942    (unspec:VNx16BI
3943      [(match_operand:GPI 1 "register_operand")
3944       (match_operand:GPI 2 "register_operand")
3945       (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
3946       (match_operand:GPI 4 "const_int_operand")]
3947      SVE2_WHILE_PTR)]
3948   "TARGET_SVE2"
3949 {
3950   /* Use the widest predicate mode we can.  */
3951   unsigned int align = INTVAL (operands[4]);
3952   if (align > 8)
3953     align = 8;
3954   machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
3955
3956   /* Emit a WHILERW or WHILEWR, setting the condition codes based on
3957      the result.  */
3958   emit_insn (gen_while_ptest
3959              (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
3960               gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
3961               CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
3962
3963   /* Set operand 0 to true if the last bit of the predicate result is set,
3964      i.e. if all elements are free of dependencies.  */
3965   rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
3966   rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
3967   emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
3968   DONE;
3969 })
3970
3971 ;; -------------------------------------------------------------------------
3972 ;; ---- Histogram processing
3973 ;; -------------------------------------------------------------------------
3974 ;; Includes:
3975 ;; - HISTCNT
3976 ;; - HISTSEG
3977 ;; -------------------------------------------------------------------------
3978
3979 (define_insn "@aarch64_sve2_histcnt<mode>"
3980   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3981         (unspec:SVE_FULL_SDI
3982           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3983            (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3984            (match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
3985           UNSPEC_HISTCNT))]
3986   "TARGET_SVE2 && TARGET_NON_STREAMING"
3987   "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
3988 )
3989
3990 (define_insn "@aarch64_sve2_histseg<mode>"
3991   [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3992         (unspec:VNx16QI_ONLY
3993           [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
3994            (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
3995           UNSPEC_HISTSEG))]
3996   "TARGET_SVE2 && TARGET_NON_STREAMING"
3997   "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3998 )
3999
4000 ;; -------------------------------------------------------------------------
4001 ;; ---- String matching
4002 ;; -------------------------------------------------------------------------
4003 ;; Includes:
4004 ;; - MATCH
4005 ;; - NMATCH
4006 ;; -------------------------------------------------------------------------
4007
4008 ;; Predicated string matching.
4009 (define_insn "@aarch64_pred_<sve_int_op><mode>"
4010   [(set (match_operand:<VPRED> 0 "register_operand")
4011         (unspec:<VPRED>
4012           [(match_operand:<VPRED> 1 "register_operand")
4013            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
4014            (unspec:<VPRED>
4015              [(match_operand:SVE_FULL_BHI 3 "register_operand")
4016               (match_operand:SVE_FULL_BHI 4 "register_operand")]
4017              SVE2_MATCH)]
4018           UNSPEC_PRED_Z))
4019    (clobber (reg:CC_NZC CC_REGNUM))]
4020   "TARGET_SVE2 && TARGET_NON_STREAMING"
4021   {@ [ cons: =0, 1  , 3, 4; attrs: pred_clobber ]
4022      [ &Upa    , Upl, w, w; yes                 ] <sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
4023      [ ?Upl    , 0  , w, w; yes                 ] ^
4024      [ Upa     , Upl, w, w; no                  ] ^
4025   }
4026 )
4027
4028 ;; Predicated string matching in which both the flag and predicate results
4029 ;; are interesting.
4030 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
4031   [(set (reg:CC_NZC CC_REGNUM)
4032         (unspec:CC_NZC
4033           [(match_operand:VNx16BI 1 "register_operand" "Upl")
4034            (match_operand 4)
4035            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
4036            (unspec:<VPRED>
4037              [(match_operand 6)
4038               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
4039               (unspec:<VPRED>
4040                 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
4041                  (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
4042                 SVE2_MATCH)]
4043              UNSPEC_PRED_Z)]
4044           UNSPEC_PTEST))
4045    (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
4046         (unspec:<VPRED>
4047           [(match_dup 6)
4048            (match_dup 7)
4049            (unspec:<VPRED>
4050              [(match_dup 2)
4051               (match_dup 3)]
4052              SVE2_MATCH)]
4053           UNSPEC_PRED_Z))]
4054   "TARGET_SVE2
4055    && TARGET_NON_STREAMING
4056    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
4057   "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
4058   "&& !rtx_equal_p (operands[4], operands[6])"
4059   {
4060     operands[6] = copy_rtx (operands[4]);
4061     operands[7] = operands[5];
4062   }
4063 )
4064
4065 ;; Predicated string matching in which only the flags result is interesting.
4066 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
4067   [(set (reg:CC_NZC CC_REGNUM)
4068         (unspec:CC_NZC
4069           [(match_operand:VNx16BI 1 "register_operand" "Upl")
4070            (match_operand 4)
4071            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
4072            (unspec:<VPRED>
4073              [(match_operand 6)
4074               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
4075               (unspec:<VPRED>
4076                 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
4077                  (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
4078                 SVE2_MATCH)]
4079              UNSPEC_PRED_Z)]
4080           UNSPEC_PTEST))
4081    (clobber (match_scratch:<VPRED> 0 "=Upa"))]
4082   "TARGET_SVE2
4083    && TARGET_NON_STREAMING
4084    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
4085   "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
4086   "&& !rtx_equal_p (operands[4], operands[6])"
4087   {
4088     operands[6] = copy_rtx (operands[4]);
4089     operands[7] = operands[5];
4090   }
4091 )
4092
4093 ;; -------------------------------------------------------------------------
4094 ;; ---- Table lookup
4095 ;; -------------------------------------------------------------------------
4096 ;; Includes:
4097 ;; - LUTI2
4098 ;; - LUTI4
4099 ;; -------------------------------------------------------------------------
4100
4101 (define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
4102   [(set (match_operand:SVE_FULL_BH 0 "register_operand" "=w")
4103         (unspec:SVE_FULL_BH
4104          [(match_operand:SVE_FULL_BH 1 "register_operand" "w")
4105           (match_operand:VNx16QI 2 "register_operand" "w")
4106           (match_operand:DI 3 "const_int_operand")
4107           (const_int LUTI_BITS)]
4108          UNSPEC_SVE_LUTI))]
4109   "TARGET_LUT && TARGET_SVE2_OR_SME2"
4110   "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
4111 )
4112
4113 (define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
4114   [(set (match_operand:<VSINGLE> 0 "register_operand" "=w")
4115         (unspec:<VSINGLE>
4116          [(match_operand:SVE_FULL_Hx2 1 "register_operand" "w")
4117           (match_operand:VNx16QI 2 "register_operand" "w")
4118           (match_operand:DI 3 "const_int_operand")
4119           (const_int LUTI_BITS)]
4120           UNSPEC_SVE_LUTI))]
4121   "TARGET_LUT && TARGET_SVE2_OR_SME2"
4122   "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
4123 )
4124
4125 ;; =========================================================================
4126 ;; == Cryptographic extensions
4127 ;; =========================================================================
4128
4129 ;; -------------------------------------------------------------------------
4130 ;; ---- Optional AES extensions
4131 ;; -------------------------------------------------------------------------
4132 ;; Includes:
4133 ;; - AESD
4134 ;; - AESE
4135 ;; - AESIMC
4136 ;; - AESMC
4137 ;; -------------------------------------------------------------------------
4138
4139 ;; AESD and AESE.
4140 (define_insn "aarch64_sve2_aes<aes_op>"
4141   [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4142         (unspec:VNx16QI
4143           [(xor:VNx16QI
4144              (match_operand:VNx16QI 1 "register_operand" "%0")
4145              (match_operand:VNx16QI 2 "register_operand" "w"))]
4146           CRYPTO_AES))]
4147   "TARGET_SVE2_AES"
4148   "aes<aes_op>\t%0.b, %0.b, %2.b"
4149   [(set_attr "type" "crypto_aese")]
4150 )
4151
4152 ;; AESMC and AESIMC.  These instructions do not take MOVPRFX.
4153 (define_insn "aarch64_sve2_aes<aesmc_op>"
4154   [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4155         (unspec:VNx16QI
4156           [(match_operand:VNx16QI 1 "register_operand" "0")]
4157           CRYPTO_AESMC))]
4158   "TARGET_SVE2_AES"
4159   "aes<aesmc_op>\t%0.b, %0.b"
4160   [(set_attr "type" "crypto_aesmc")]
4161 )
4162
4163 ;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
4164 ;; to keep the two together and enforce the register dependency without
4165 ;; scheduling or register allocation messing up the order or introducing
4166 ;; moves inbetween.  Mash the two together during combine.
4167
4168 (define_insn "*aarch64_sve2_aese_fused"
4169   [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4170         (unspec:VNx16QI
4171           [(unspec:VNx16QI
4172              [(xor:VNx16QI
4173                 (match_operand:VNx16QI 1 "register_operand" "%0")
4174                 (match_operand:VNx16QI 2 "register_operand" "w"))]
4175              UNSPEC_AESE)]
4176           UNSPEC_AESMC))]
4177   "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
4178   "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
4179   [(set_attr "type" "crypto_aese")
4180    (set_attr "length" "8")]
4181 )
4182
4183 (define_insn "*aarch64_sve2_aesd_fused"
4184   [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4185         (unspec:VNx16QI
4186           [(unspec:VNx16QI
4187              [(xor:VNx16QI
4188                 (match_operand:VNx16QI 1 "register_operand" "%0")
4189                 (match_operand:VNx16QI 2 "register_operand" "w"))]
4190              UNSPEC_AESD)]
4191           UNSPEC_AESIMC))]
4192   "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
4193   "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
4194   [(set_attr "type" "crypto_aese")
4195    (set_attr "length" "8")]
4196 )
4197
4198 ;; -------------------------------------------------------------------------
4199 ;; ---- Optional SHA-3 extensions
4200 ;; -------------------------------------------------------------------------
4201 ;; Includes:
4202 ;; - RAX1
4203 ;; -------------------------------------------------------------------------
4204
4205 (define_insn "aarch64_sve2_rax1"
4206   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4207         (xor:VNx2DI
4208           (rotate:VNx2DI
4209             (match_operand:VNx2DI 2 "register_operand" "w")
4210             (const_int 1))
4211           (match_operand:VNx2DI 1 "register_operand" "w")))]
4212   "TARGET_SVE2_SHA3"
4213   "rax1\t%0.d, %1.d, %2.d"
4214   [(set_attr "type" "crypto_sha3")]
4215 )
4216
4217 ;; -------------------------------------------------------------------------
4218 ;; ---- Optional SM4 extensions
4219 ;; -------------------------------------------------------------------------
4220 ;; Includes:
4221 ;; - SM4E
4222 ;; - SM4EKEY
4223 ;; -------------------------------------------------------------------------
4224
4225 ;; These instructions do not take MOVPRFX.
4226 (define_insn "aarch64_sve2_sm4e"
4227   [(set (match_operand:VNx4SI 0 "register_operand" "=w")
4228         (unspec:VNx4SI
4229           [(match_operand:VNx4SI 1 "register_operand" "0")
4230            (match_operand:VNx4SI 2 "register_operand" "w")]
4231           UNSPEC_SM4E))]
4232   "TARGET_SVE2_SM4"
4233   "sm4e\t%0.s, %0.s, %2.s"
4234   [(set_attr "type" "crypto_sm4")]
4235 )
4236
4237 (define_insn "aarch64_sve2_sm4ekey"
4238   [(set (match_operand:VNx4SI 0 "register_operand" "=w")
4239         (unspec:VNx4SI
4240           [(match_operand:VNx4SI 1 "register_operand" "w")
4241            (match_operand:VNx4SI 2 "register_operand" "w")]
4242           UNSPEC_SM4EKEY))]
4243   "TARGET_SVE2_SM4"
4244   "sm4ekey\t%0.s, %1.s, %2.s"
4245   [(set_attr "type" "crypto_sm4")]
4246 )