gcc/config/aarch64/aarch64-sve.md

   1 ;; Machine description for AArch64 SVE.
   2 ;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The file is organised into the following sections (search for the full
  22 ;; line):
  23 ;;
  24 ;; == General notes
  25 ;; ---- Note on the handling of big-endian SVE
  26 ;; ---- Description of UNSPEC_PTEST
  27 ;; ---- Description of UNSPEC_PRED_Z
  28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
  29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
  30 ;; ---- Note on FFR handling
  31 ;;
  32 ;; == Moves
  33 ;; ---- Moves of single vectors
  34 ;; ---- Moves of multiple vectors
  35 ;; ---- Moves of predicates
  36 ;; ---- Moves of multiple predicates
  37 ;; ---- Moves relating to the FFR
  38 ;;
  39 ;; == Loads
  40 ;; ---- Normal contiguous loads
  41 ;; ---- Extending contiguous loads
  42 ;; ---- First-faulting contiguous loads
  43 ;; ---- First-faulting extending contiguous loads
  44 ;; ---- Non-temporal contiguous loads
  45 ;; ---- Normal gather loads
  46 ;; ---- Extending gather loads
  47 ;; ---- First-faulting gather loads
  48 ;; ---- First-faulting extending gather loads
  49 ;;
  50 ;; == Prefetches
  51 ;; ---- Contiguous prefetches
  52 ;; ---- Gather prefetches
  53 ;;
  54 ;; == Stores
  55 ;; ---- Normal contiguous stores
  56 ;; ---- Truncating contiguous stores
  57 ;; ---- Non-temporal contiguous stores
  58 ;; ---- Normal scatter stores
  59 ;; ---- Truncating scatter stores
  60 ;;
  61 ;; == Vector creation
  62 ;; ---- [INT,FP] Duplicate element
  63 ;; ---- [INT,FP] Initialize from individual elements
  64 ;; ---- [INT] Linear series
  65 ;; ---- [PRED] Duplicate element
  66 ;;
  67 ;; == Vector decomposition
  68 ;; ---- [INT,FP] Extract index
  69 ;; ---- [INT,FP] Extract active element
  70 ;; ---- [PRED] Extract index
  71 ;;
  72 ;; == Unary arithmetic
  73 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
  74 ;; ---- [INT] General unary arithmetic corresponding to unspecs
  75 ;; ---- [INT] Sign and zero extension
  76 ;; ---- [INT] Truncation
  77 ;; ---- [INT] Logical inverse
  78 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
  79 ;; ---- [FP] General unary arithmetic corresponding to unspecs
  80 ;; ---- [FP] Square root
  81 ;; ---- [FP] Reciprocal square root
  82 ;; ---- [PRED] Inverse
  83
  84 ;; == Binary arithmetic
  85 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
  86 ;; ---- [INT] Addition
  87 ;; ---- [INT] Subtraction
  88 ;; ---- [INT] Take address
  89 ;; ---- [INT] Absolute difference
  90 ;; ---- [INT] Saturating addition and subtraction
  91 ;; ---- [INT] Highpart multiplication
  92 ;; ---- [INT] Division
  93 ;; ---- [INT] Binary logical operations
  94 ;; ---- [INT] Binary logical operations (inverted second input)
  95 ;; ---- [INT] Shifts (rounding towards -Inf)
  96 ;; ---- [INT] Shifts (rounding towards 0)
  97 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
  98 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
  99 ;; ---- [FP] General binary arithmetic corresponding to unspecs
 100 ;; ---- [FP] Addition
 101 ;; ---- [FP] Complex addition
 102 ;; ---- [FP] Subtraction
 103 ;; ---- [FP] Absolute difference
 104 ;; ---- [FP] Multiplication
 105 ;; ---- [FP] Division
 106 ;; ---- [FP] Binary logical operations
 107 ;; ---- [FP] Sign copying
 108 ;; ---- [FP] Maximum and minimum
 109 ;; ---- [PRED] Binary logical operations
 110 ;; ---- [PRED] Binary logical operations (inverted second input)
 111 ;; ---- [PRED] Binary logical operations (inverted result)
 112 ;;
 113 ;; == Ternary arithmetic
 114 ;; ---- [INT] MLA and MAD
 115 ;; ---- [INT] MLS and MSB
 116 ;; ---- [INT] Dot product
 117 ;; ---- [INT] Sum of absolute differences
 118 ;; ---- [INT] Matrix multiply-accumulate
 119 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 120 ;; ---- [FP] Complex multiply-add
 121 ;; ---- [FP] Trigonometric multiply-add
 122 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
 123 ;; ---- [FP] Matrix multiply-accumulate
 124 ;;
 125 ;; == Comparisons and selects
 126 ;; ---- [INT,FP] Select based on predicates
 127 ;; ---- [INT] Comparisons
 128 ;; ---- [INT] While tests
 129 ;; ---- [FP] Direct comparisons
 130 ;; ---- [FP] Absolute comparisons
 131 ;; ---- [PRED] Select
 132 ;; ---- [PRED] Test bits
 133 ;;
 134 ;; == Reductions
 135 ;; ---- [INT,FP] Conditional reductions
 136 ;; ---- [INT] Tree reductions
 137 ;; ---- [FP] Tree reductions
 138 ;; ---- [FP] Left-to-right reductions
 139 ;;
 140 ;; == Permutes
 141 ;; ---- [INT,FP] General permutes
 142 ;; ---- [INT,FP] Special-purpose unary permutes
 143 ;; ---- [INT,FP] Special-purpose binary permutes
 144 ;; ---- [PRED] Special-purpose unary permutes
 145 ;; ---- [PRED] Special-purpose binary permutes
 146 ;;
 147 ;; == Conversions
 148 ;; ---- [INT<-INT] Packs
 149 ;; ---- [INT<-INT] Unpacks
 150 ;; ---- [INT<-FP] Conversions
 151 ;; ---- [INT<-FP] Packs
 152 ;; ---- [INT<-FP] Unpacks
 153 ;; ---- [FP<-INT] Conversions
 154 ;; ---- [FP<-INT] Packs
 155 ;; ---- [FP<-INT] Unpacks
 156 ;; ---- [FP<-FP] Packs
 157 ;; ---- [FP<-FP] Packs (bfloat16)
 158 ;; ---- [FP<-FP] Unpacks
 159 ;; ---- [PRED<-PRED] Packs
 160 ;; ---- [PRED<-PRED] Unpacks
 161 ;;
 162 ;; == Vector partitioning
 163 ;; ---- [PRED] Unary partitioning
 164 ;; ---- [PRED] Binary partitioning
 165 ;; ---- [PRED] Scalarization
 166 ;;
 167 ;; == Counting elements
 168 ;; ---- [INT] Count elements in a pattern (scalar)
 169 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
 170 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
 171 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
 172 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
 173 ;; ---- [INT] Count elements in a predicate (scalar)
 174 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
 175 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
 176 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
 177 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
 178
 179 ;; =========================================================================
 180 ;; == General notes
 181 ;; =========================================================================
 182 ;;
 183 ;; -------------------------------------------------------------------------
 184 ;; ---- Note on the handling of big-endian SVE
 185 ;; -------------------------------------------------------------------------
 186 ;;
 187 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
 188 ;; same way as movdi or movti would: the first byte of memory goes
 189 ;; into the most significant byte of the register and the last byte
 190 ;; of memory goes into the least significant byte of the register.
 191 ;; This is the most natural ordering for Advanced SIMD and matches
 192 ;; the ABI layout for 64-bit and 128-bit vector types.
 193 ;;
 194 ;; As a result, the order of bytes within the register is what GCC
 195 ;; expects for a big-endian target, and subreg offsets therefore work
 196 ;; as expected, with the first element in memory having subreg offset 0
 197 ;; and the last element in memory having the subreg offset associated
 198 ;; with a big-endian lowpart.  However, this ordering also means that
 199 ;; GCC's lane numbering does not match the architecture's numbering:
 200 ;; GCC always treats the element at the lowest address in memory
 201 ;; (subreg offset 0) as element 0, while the architecture treats
 202 ;; the least significant end of the register as element 0.
 203 ;;
 204 ;; The situation for SVE is different.  We want the layout of the
 205 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
 206 ;; logically, a mov<mode> load must be indistinguishable from a
 207 ;; maskload<mode> whose mask is all true.  We therefore need the
 208 ;; register layout to match LD1 rather than LDR.  The ABI layout of
 209 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
 210 ;;
 211 ;; As a result, the architecture lane numbering matches GCC's lane
 212 ;; numbering, with element 0 always being the first in memory.
 213 ;; However:
 214 ;;
 215 ;; - Applying a subreg offset to a register does not give the element
 216 ;;   that GCC expects: the first element in memory has the subreg offset
 217 ;;   associated with a big-endian lowpart while the last element in memory
 218 ;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
 219 ;;
 220 ;; - We cannot use LDR and STR for spill slots that might be accessed
 221 ;;   via subregs, since although the elements have the order GCC expects,
 222 ;;   the order of the bytes within the elements is different.  We instead
 223 ;;   access spill slots via LD1 and ST1, using secondary reloads to
 224 ;;   reserve a predicate register.
 225 ;;
 226 ;; -------------------------------------------------------------------------
 227 ;; ---- Description of UNSPEC_PTEST
 228 ;; -------------------------------------------------------------------------
 229 ;;
 230 ;; SVE provides a PTEST instruction for testing the active lanes of a
 231 ;; predicate and setting the flags based on the result.  The associated
 232 ;; condition code tests are:
 233 ;;
 234 ;; - any   (= ne): at least one active bit is set
 235 ;; - none  (= eq): all active bits are clear (*)
 236 ;; - first (= mi): the first active bit is set
 237 ;; - nfrst (= pl): the first active bit is clear (*)
 238 ;; - last  (= cc): the last active bit is set
 239 ;; - nlast (= cs): the last active bit is clear (*)
 240 ;;
 241 ;; where the conditions marked (*) are also true when there are no active
 242 ;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
 243 ;; of a PTEST use the condition code mode CC_NZC.
 244 ;;
 245 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
 246 ;; This means that for other predicate modes, we need a governing predicate
 247 ;; in which all bits are defined.
 248 ;;
 249 ;; For example, most predicated .H operations ignore the odd bits of the
 250 ;; governing predicate, so that an active lane is represented by the
 251 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
 252 ;; any value.  To test a .H predicate, we instead need "10" and "00"
 253 ;; respectively, so that the condition only tests the even bits of the
 254 ;; predicate.
 255 ;;
 256 ;; Several instructions set the flags as a side-effect, in the same way
 257 ;; that a separate PTEST would.  It's important for code quality that we
 258 ;; use these flags results as often as possible, particularly in the case
 259 ;; of WHILE* and RDFFR.
 260 ;;
 261 ;; Also, some of the instructions that set the flags are unpredicated
 262 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
 263 ;; they were predicated on a PTRUE of that size.  For example, a .S
 264 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
 265 ;; would.
 266 ;;
 267 ;; We therefore need to represent PTEST operations in a way that
 268 ;; makes it easy to combine them with both predicated and unpredicated
 269 ;; operations, while using a VNx16BI governing predicate for all
 270 ;; predicate modes.  We do this using:
 271 ;;
 272 ;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
 273 ;;
 274 ;; where:
 275 ;;
 276 ;; - GP is the real VNx16BI governing predicate
 277 ;;
 278 ;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
 279 ;;   GP to CAST_GP are guaranteed to be clear in GP.
 280 ;;
 281 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 282 ;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
 283 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 284 ;;
 285 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 286 ;;
 287 ;; -------------------------------------------------------------------------
 288 ;; ---- Description of UNSPEC_PRED_Z
 289 ;; -------------------------------------------------------------------------
 290 ;;
 291 ;; SVE integer comparisons are predicated and return zero for inactive
 292 ;; lanes.  Sometimes we use them with predicates that are all-true and
 293 ;; sometimes we use them with general predicates.
 294 ;;
 295 ;; The integer comparisons also set the flags and so build-in the effect
 296 ;; of a PTEST.  We therefore want to be able to combine integer comparison
 297 ;; patterns with PTESTs of the result.  One difficulty with doing this is
 298 ;; that (as noted above) the PTEST is always a .B operation and so can place
 299 ;; stronger requirements on the governing predicate than the comparison does.
 300 ;;
 301 ;; For example, when applying a separate PTEST to the result of a full-vector
 302 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
 303 ;; .B PTRUE.  In constrast, the comparison might be predicated on either
 304 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
 305 ;; bits don't matter for .H operations.
 306 ;;
 307 ;; We therefore can't rely on a full-vector comparison using the same
 308 ;; predicate register as a following PTEST.  We instead need to remember
 309 ;; whether a comparison is known to be a full-vector comparison and use
 310 ;; this information in addition to a check for equal predicate registers.
 311 ;; At the same time, it's useful to have a common representation for all
 312 ;; integer comparisons, so that they can be handled by a single set of
 313 ;; patterns.
 314 ;;
 315 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
 316 ;;
 317 ;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
 318 ;;
 319 ;; where:
 320 ;;
 321 ;; - GP is the governing predicate, of mode <M:VPRED>
 322 ;;
 323 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 324 ;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
 325 ;;   otherwise
 326 ;;
 327 ;; - CODE is the comparison code
 328 ;;
 329 ;; - OP0 and OP1 are the values being compared, of mode M
 330 ;;
 331 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
 332 ;;
 333 ;; -------------------------------------------------------------------------
 334 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 335 ;; -------------------------------------------------------------------------
 336 ;;
 337 ;; Many SVE integer operations are predicated.  We can generate them
 338 ;; from four sources:
 339 ;;
 340 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 341 ;;     an all-true predicate register to act as the governing predicate
 342 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 343 ;;     the values of inactive lanes don't matter.
 344 ;;
 345 ;; (2) Using _x ACLE functions.  In this case the function provides a
 346 ;;     specific predicate and some lanes might be inactive.  However,
 347 ;;     as for (1), the values of the inactive lanes don't matter.
 348 ;;     We can make extra lanes active without changing the behavior
 349 ;;     (although for code-quality reasons we should avoid doing so
 350 ;;     needlessly).
 351 ;;
 352 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 353 ;;     These optabs have a predicate operand that specifies which lanes are
 354 ;;     active and another operand that provides the values of inactive lanes.
 355 ;;
 356 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 357 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 358 ;;     and the _m functions setting the inactive lanes to one of the function
 359 ;;     arguments.
 360 ;;
 361 ;; For (1) and (2) we need a way of attaching the predicate to a normal
 362 ;; unpredicated integer operation.  We do this using:
 363 ;;
 364 ;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
 365 ;;
 366 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
 367 ;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
 368 ;; it always is for (1), but might not be for (2).
 369 ;;
 370 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
 371 ;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
 372 ;; but as noted above, we should only do so if there's a specific benefit.
 373 ;;
 374 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
 375 ;;
 376 ;; For (3) and (4) we can simply use the SVE port's normal representation
 377 ;; of a predicate-based select:
 378 ;;
 379 ;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
 380 ;;
 381 ;; where INACTIVE specifies the values of inactive lanes.
 382 ;;
 383 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
 384 ;; than inserting the integer operation directly.  This is mostly useful
 385 ;; if we want the combine pass to merge an integer operation with an explicit
 386 ;; vcond_mask (in other words, with a following SEL instruction).  However,
 387 ;; it's generally better to merge such operations at the gimple level
 388 ;; using (3).
 389 ;;
 390 ;; -------------------------------------------------------------------------
 391 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 392 ;; -------------------------------------------------------------------------
 393 ;;
 394 ;; Most SVE floating-point operations are predicated.  We can generate
 395 ;; them from four sources:
 396 ;;
 397 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 398 ;;     an all-true predicate register to act as the governing predicate
 399 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 400 ;;     the values of inactive lanes don't matter.
 401 ;;
 402 ;; (2) Using _x ACLE functions.  In this case the function provides a
 403 ;;     specific predicate and some lanes might be inactive.  However,
 404 ;;     as for (1), the values of the inactive lanes don't matter.
 405 ;;
 406 ;;     The instruction must have the same exception behavior as the
 407 ;;     function call unless things like command-line flags specifically
 408 ;;     allow otherwise.  For example, with -ffast-math, it is OK to
 409 ;;     raise exceptions for inactive lanes, but normally it isn't.
 410 ;;
 411 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 412 ;;     These optabs have a predicate operand that specifies which lanes are
 413 ;;     active and another operand that provides the values of inactive lanes.
 414 ;;
 415 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 416 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 417 ;;     and the _m functions setting the inactive lanes to one of the function
 418 ;;     arguments.
 419 ;;
 420 ;; So:
 421 ;;
 422 ;; - In (1), the predicate is known to be all true and the pattern can use
 423 ;;   unpredicated operations where available.
 424 ;;
 425 ;; - In (2), the predicate might or might not be all true.  The pattern can
 426 ;;   use unpredicated instructions if the predicate is all-true or if things
 427 ;;   like command-line flags allow exceptions for inactive lanes.
 428 ;;
 429 ;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
 430 ;;   might be inactive and inactive lanes of the result must have specific
 431 ;;   values.  There is no scope for using unpredicated instructions (and no
 432 ;;   reason to want to), so the question about command-line flags doesn't
 433 ;;   arise.
 434 ;;
 435 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
 436 ;; in combination with a separate predicate operand, e.g.
 437 ;;
 438 ;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
 439 ;;            (sqrt:SVE_FULL_F 2 "register_operand" "w")]
 440 ;;           ....)
 441 ;;
 442 ;; because (sqrt ...) can raise an exception for any lane, including
 443 ;; inactive ones.  We therefore need to use an unspec instead.
 444 ;;
 445 ;; Also, (2) requires some way of distinguishing the case in which the
 446 ;; predicate might have inactive lanes and cannot be changed from the
 447 ;; case in which the predicate has no inactive lanes or can be changed.
 448 ;; This information is also useful when matching combined FP patterns
 449 ;; in which the predicates might not be equal.
 450 ;;
 451 ;; We therefore model FP operations as an unspec of the form:
 452 ;;
 453 ;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
 454 ;;
 455 ;; where:
 456 ;;
 457 ;; - PRED is the governing predicate.
 458 ;;
 459 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
 460 ;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
 461 ;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
 462 ;;
 463 ;; - OP0 OP1 ... are the normal input operands to the operation.
 464 ;;
 465 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 466 ;;
 467 ;; For (3) and (4), we combine these operations with an UNSPEC_SEL
 468 ;; that selects between the result of the FP operation and the "else"
 469 ;; value.  (This else value is a merge input for _m ACLE functions
 470 ;; and zero for _z ACLE functions.)  The outer pattern then has the form:
 471 ;;
 472 ;;   (unspec [pred fp_operation else_value] UNSPEC_SEL)
 473 ;;
 474 ;; This means that the patterns for (3) and (4) have two predicates:
 475 ;; one for the FP operation itself and one for the UNSPEC_SEL.
 476 ;; This pattern is equivalent to the result of combining an instance
 477 ;; of (1) or (2) with a separate vcond instruction, so these patterns
 478 ;; are useful as combine targets too.
 479 ;;
 480 ;; However, in the combine case, the instructions that we want to
 481 ;; combine might use different predicates.  Then:
 482 ;;
 483 ;; - Some of the active lanes of the FP operation might be discarded
 484 ;;   by the UNSPEC_SEL.  It's OK to drop the FP operation on those lanes,
 485 ;;   even for SVE_STRICT_GP, since the operations on those lanes are
 486 ;;   effectively dead code.
 487 ;;
 488 ;; - Some of the inactive lanes of the FP operation might be selected
 489 ;;   by the UNSPEC_SEL, giving unspecified values for those lanes.
 490 ;;   SVE_RELAXED_GP lets us extend the FP operation to cover these
 491 ;;   extra lanes, but SVE_STRICT_GP does not.
 492 ;;
 493 ;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
 494 ;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
 495 ;; This typically leads to patterns like:
 496 ;;
 497 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 498 ;;             (unspec [(match_operand N)
 499 ;;                      (const_int SVE_RELAXED_GP)
 500 ;;                      ...]
 501 ;;                     UNSPEC_COND_<MNEMONIC>)
 502 ;;             ...])
 503 ;;
 504 ;; where operand N is allowed to be anything.  These instructions then
 505 ;; have rewrite rules to replace operand N with operand 1, which gives the
 506 ;; instructions a canonical form and means that the original operand N is
 507 ;; not kept live unnecessarily.
 508 ;;
 509 ;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
 510 ;; a subset of the FP operation predicate.  This case isn't interesting
 511 ;; for FP operations that have an all-true predicate, since such operations
 512 ;; use SVE_RELAXED_GP instead.  And it is not possible for instruction
 513 ;; conditions to track the subset relationship for arbitrary registers.
 514 ;; So in practice, the only useful case for SVE_STRICT_GP is the one
 515 ;; in which the predicates match:
 516 ;;
 517 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 518 ;;             (unspec [(match_dup 1)
 519 ;;                      (const_int SVE_STRICT_GP)
 520 ;;                      ...]
 521 ;;                     UNSPEC_COND_<MNEMONIC>)
 522 ;;             ...])
 523 ;;
 524 ;; This pattern would also be correct for SVE_RELAXED_GP, but it would
 525 ;; be redundant with the one above.  However, if the combine pattern
 526 ;; has multiple FP operations, using a match_operand allows combinations
 527 ;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
 528 ;; that the predicates are the same:
 529 ;;
 530 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 531 ;;             (...
 532 ;;                (unspec [(match_dup 1)
 533 ;;                         (match_operand:SI N "aarch64_sve_gp_strictness")
 534 ;;                         ...]
 535 ;;                        UNSPEC_COND_<MNEMONIC1>)
 536 ;;                (unspec [(match_dup 1)
 537 ;;                         (match_operand:SI M "aarch64_sve_gp_strictness")
 538 ;;                         ...]
 539 ;;                        UNSPEC_COND_<MNEMONIC2>) ...)
 540 ;;             ...])
 541 ;;
 542 ;; The fully-relaxed version of this pattern is:
 543 ;;
 544 ;;    (unspec [(match_operand 1 "register_operand" "Upl")
 545 ;;             (...
 546 ;;                (unspec [(match_operand:SI N)
 547 ;;                         (const_int SVE_RELAXED_GP)
 548 ;;                         ...]
 549 ;;                        UNSPEC_COND_<MNEMONIC1>)
 550 ;;                (unspec [(match_operand:SI M)
 551 ;;                         (const_int SVE_RELAXED_GP)
 552 ;;                         ...]
 553 ;;                        UNSPEC_COND_<MNEMONIC2>) ...)
 554 ;;             ...])
 555 ;;
 556 ;; -------------------------------------------------------------------------
 557 ;; ---- Note on FFR handling
 558 ;; -------------------------------------------------------------------------
 559 ;;
 560 ;; Logically we want to divide FFR-related instructions into regions
 561 ;; that contain exactly one of:
 562 ;;
 563 ;; - a single write to the FFR
 564 ;; - any number of reads from the FFR (but only one read is likely)
 565 ;; - any number of LDFF1 and LDNF1 instructions
 566 ;;
 567 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
 568 ;; normal loads as far as possible.  This means that they should be
 569 ;; schedulable within a region in the same way that LD1 would be,
 570 ;; and they should be deleted as dead if the result is unused.  The loads
 571 ;; should therefore not write to the FFR, since that would both serialize
 572 ;; the loads with respect to each other and keep the loads live for any
 573 ;; later RDFFR.
 574 ;;
 575 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
 576 ;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
 577 ;; while using the FFRT keeps the instruction within its region.
 578 ;; Specifically:
 579 ;;
 580 ;; - Writes start a new FFRT region as well as setting the FFR:
 581 ;;
 582 ;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
 583 ;;
 584 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
 585 ;;   loads stay within the same FFRT region:
 586 ;;
 587 ;;       L1: load data while using the FFRT
 588 ;;
 589 ;;   In addition, any FFRT region that includes a load also has at least one
 590 ;;   instance of:
 591 ;;
 592 ;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
 593 ;;
 594 ;;   to make it clear that the region both reads from and writes to the FFR.
 595 ;;
 596 ;; - Reads do the following:
 597 ;;
 598 ;;       R1: FFRT = FFR               [type == no_insn]
 599 ;;       R2: read from the FFRT
 600 ;;       R3: FFRT = update(FFRT)      [type == no_insn]
 601 ;;
 602 ;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
 603 ;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
 604 ;;   cannot move backwards across R3.
 605 ;;
 606 ;; This way, writes are only kept alive by later loads or reads,
 607 ;; and write/read pairs fold normally.  For two consecutive reads,
 608 ;; the first R3 is made dead by the second R1, which in turn becomes
 609 ;; redundant with the first R1.  We then have:
 610 ;;
 611 ;;     first R1: FFRT = FFR
 612 ;;     first read from the FFRT
 613 ;;     second read from the FFRT
 614 ;;     second R3: FFRT = update(FFRT)
 615 ;;
 616 ;; i.e. the two FFRT regions collapse into a single one with two
 617 ;; independent reads.
 618 ;;
 619 ;; The model still prevents some valid optimizations though.  For example,
 620 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
 621 ;; the L2 instructions.
 622
 623 ;; =========================================================================
 624 ;; == Moves
 625 ;; =========================================================================
 626
 627 ;; -------------------------------------------------------------------------
 628 ;; ---- Moves of single vectors
 629 ;; -------------------------------------------------------------------------
 630 ;; Includes:
 631 ;; - MOV  (including aliases)
 632 ;; - LD1B (contiguous form)
 633 ;; - LD1D (    "    "     )
 634 ;; - LD1H (    "    "     )
 635 ;; - LD1W (    "    "     )
 636 ;; - LDR
 637 ;; - ST1B (contiguous form)
 638 ;; - ST1D (    "    "     )
 639 ;; - ST1H (    "    "     )
 640 ;; - ST1W (    "    "     )
 641 ;; - STR
 642 ;; -------------------------------------------------------------------------
 643
 644 (define_expand "mov<mode>"
 645   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 646         (match_operand:SVE_ALL 1 "general_operand"))]
 647   "TARGET_SVE"
 648   {
 649     /* Use the predicated load and store patterns where possible.
 650        This is required for big-endian targets (see the comment at the
 651        head of the file) and increases the addressing choices for
 652        little-endian.  */
 653     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 654         && can_create_pseudo_p ())
 655       {
 656         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 657         DONE;
 658       }
 659
 660     if (CONSTANT_P (operands[1]))
 661       {
 662         aarch64_expand_mov_immediate (operands[0], operands[1]);
 663         DONE;
 664       }
 665
 666     /* Optimize subregs on big-endian targets: we can use REV[BHW]
 667        instead of going through memory.  */
 668     if (BYTES_BIG_ENDIAN
 669         && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
 670       DONE;
 671   }
 672 )
 673
 674 (define_expand "movmisalign<mode>"
 675   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 676         (match_operand:SVE_ALL 1 "general_operand"))]
 677   "TARGET_SVE"
 678   {
 679     /* Equivalent to a normal move for our purpooses.  */
 680     emit_move_insn (operands[0], operands[1]);
 681     DONE;
 682   }
 683 )
 684
 685 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
 686 ;; little-endian ordering is acceptable.  Only allow memory operations during
 687 ;; and after RA; before RA we want the predicated load and store patterns to
 688 ;; be used instead.
 689 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
 690   [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand")
 691         (match_operand:SVE_FULL 1 "aarch64_sve_general_operand"))]
 692   "TARGET_SVE
 693    && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
 694    && ((lra_in_progress || reload_completed)
 695        || (register_operand (operands[0], <MODE>mode)
 696            && nonmemory_operand (operands[1], <MODE>mode)))"
 697   {@ [ cons: =0 , 1    ]
 698      [ w        , Utr  ] ldr\t%0, %1
 699      [ Utr      , w    ] str\t%1, %0
 700      [ w        , w    ] mov\t%0.d, %1.d
 701      [ w        , Dn   ] << aarch64_output_sve_mov_immediate (operands[1]);
 702   }
 703 )
 704
 705 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 706 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 707 ;; accesses require secondary reloads.
 708 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
 709   [(set (match_operand:SVE_ALL 0 "register_operand")
 710         (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand"))]
 711   "TARGET_SVE
 712    && <MODE>mode != VNx16QImode
 713    && (BYTES_BIG_ENDIAN
 714        || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
 715   {@ [ cons: =0 , 1   ]
 716      [ w        , w   ] mov\t%0.d, %1.d
 717      [ w        , Dn  ] << aarch64_output_sve_mov_immediate (operands[1]);
 718   }
 719 )
 720
 721 ;; Handle memory reloads for modes that can't use LDR and STR.  We use
 722 ;; byte PTRUE for all modes to try to encourage reuse.  This pattern
 723 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
 724 (define_expand "aarch64_sve_reload_mem"
 725   [(parallel
 726      [(set (match_operand 0)
 727            (match_operand 1))
 728       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
 729   "TARGET_SVE"
 730   {
 731     /* Create a PTRUE.  */
 732     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 733
 734     /* Refer to the PTRUE in the appropriate mode for this move.  */
 735     machine_mode mode = GET_MODE (operands[0]);
 736     rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
 737
 738     /* Emit a predicated load or store.  */
 739     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
 740     DONE;
 741   }
 742 )
 743
 744 ;; A predicated move in which the predicate is known to be all-true.
 745 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
 746 ;; so changes to this pattern will need changes there as well.
 747 (define_insn_and_split "@aarch64_pred_mov<mode>"
 748   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 749         (unspec:SVE_ALL
 750           [(match_operand:<VPRED> 1 "register_operand")
 751            (match_operand:SVE_ALL 2 "nonimmediate_operand")]
 752           UNSPEC_PRED_X))]
 753   "TARGET_SVE
 754    && (register_operand (operands[0], <MODE>mode)
 755        || register_operand (operands[2], <MODE>mode))"
 756   {@ [ cons: =0 , 1   , 2  ]
 757      [ w        , Upl , w  ] #
 758      [ w        , Upl , m  ] ld1<Vesize>\t%0.<Vctype>, %1/z, %2
 759      [ m        , Upl , w  ] st1<Vesize>\t%2.<Vctype>, %1, %0
 760   }
 761   "&& register_operand (operands[0], <MODE>mode)
 762    && register_operand (operands[2], <MODE>mode)"
 763   [(set (match_dup 0) (match_dup 2))]
 764 )
 765
 766 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
 767 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
 768 ;; for details.  We use a special predicate for operand 2 to reduce
 769 ;; the number of patterns.
 770 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
 771   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
 772         (unspec:SVE_ALL
 773           [(match_operand:VNx16BI 1 "register_operand" "Upl")
 774            (match_operand 2 "aarch64_any_register_operand" "w")]
 775           UNSPEC_REV_SUBREG))]
 776   "TARGET_SVE && BYTES_BIG_ENDIAN"
 777   "#"
 778   "&& reload_completed"
 779   [(const_int 0)]
 780   {
 781     aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
 782     DONE;
 783   }
 784 )
 785
 786 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
 787 ;; This is equivalent to a subreg on little-endian targets but not for
 788 ;; big-endian; see the comment at the head of the file for details.
 789 (define_expand "@aarch64_sve_reinterpret<mode>"
 790   [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand")
 791         (unspec:SVE_ALL_STRUCT
 792           [(match_operand 1 "aarch64_any_register_operand")]
 793           UNSPEC_REINTERPRET))]
 794   "TARGET_SVE"
 795   {
 796     machine_mode src_mode = GET_MODE (operands[1]);
 797     if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
 798       {
 799         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
 800         DONE;
 801       }
 802   }
 803 )
 804
 805 ;; A pattern for handling type punning on big-endian targets.  We use a
 806 ;; special predicate for operand 1 to reduce the number of patterns.
 807 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
 808   [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand" "=w")
 809         (unspec:SVE_ALL_STRUCT
 810           [(match_operand 1 "aarch64_any_register_operand" "w")]
 811           UNSPEC_REINTERPRET))]
 812   "TARGET_SVE"
 813   "#"
 814   "&& reload_completed"
 815   [(set (match_dup 0) (match_dup 1))]
 816   {
 817     operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
 818   }
 819 )
 820
 821 ;; -------------------------------------------------------------------------
 822 ;; ---- Moves of multiple vectors
 823 ;; -------------------------------------------------------------------------
 824 ;; All patterns in this section are synthetic and split to real
 825 ;; instructions after reload.
 826 ;; -------------------------------------------------------------------------
 827
 828 (define_expand "mov<mode>"
 829   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 830         (match_operand:SVE_STRUCT 1 "general_operand"))]
 831   "TARGET_SVE"
 832   {
 833     /* Big-endian loads and stores need to be done via LD1 and ST1;
 834        see the comment at the head of the file for details.  */
 835     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 836         && BYTES_BIG_ENDIAN)
 837       {
 838         gcc_assert (can_create_pseudo_p ());
 839         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 840         DONE;
 841       }
 842
 843     if (CONSTANT_P (operands[1]))
 844       {
 845         aarch64_expand_mov_immediate (operands[0], operands[1]);
 846         DONE;
 847       }
 848   }
 849 )
 850
 851 ;; Unpredicated structure moves (little-endian).
 852 (define_insn "*aarch64_sve_mov<mode>_le"
 853   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 854         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 855   "TARGET_SVE && !BYTES_BIG_ENDIAN"
 856   "#"
 857   [(set_attr "length" "<insn_length>")]
 858 )
 859
 860 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 861 ;; secondary reloads.
 862 (define_insn "*aarch64_sve_mov<mode>_be"
 863   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 864         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
 865   "TARGET_SVE && BYTES_BIG_ENDIAN"
 866   "#"
 867   [(set_attr "length" "<insn_length>")]
 868 )
 869
 870 ;; Split unpredicated structure moves into pieces.  This is the same
 871 ;; for both big-endian and little-endian code, although it only needs
 872 ;; to handle memory operands for little-endian code.
 873 (define_split
 874   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
 875         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
 876   "TARGET_SVE && reload_completed"
 877   [(const_int 0)]
 878   {
 879     rtx dest = operands[0];
 880     rtx src = operands[1];
 881     if (REG_P (dest) && REG_P (src))
 882       aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
 883     else
 884       for (unsigned int i = 0; i < <vector_count>; ++i)
 885         {
 886           rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
 887                                              i * BYTES_PER_SVE_VECTOR);
 888           rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
 889                                             i * BYTES_PER_SVE_VECTOR);
 890           emit_insn (gen_rtx_SET (subdest, subsrc));
 891         }
 892     DONE;
 893   }
 894 )
 895
 896 ;; Predicated structure moves.  This works for both endiannesses but in
 897 ;; practice is only useful for big-endian.
 898 (define_insn_and_split "@aarch64_pred_mov<mode>"
 899   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 900         (unspec:SVE_STRUCT
 901           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 902            (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
 903           UNSPEC_PRED_X))]
 904   "TARGET_SVE
 905    && (register_operand (operands[0], <MODE>mode)
 906        || register_operand (operands[2], <MODE>mode))"
 907   "#"
 908   "&& reload_completed"
 909   [(const_int 0)]
 910   {
 911     for (unsigned int i = 0; i < <vector_count>; ++i)
 912       {
 913         rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
 914                                            <MODE>mode,
 915                                            i * BYTES_PER_SVE_VECTOR);
 916         rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
 917                                           <MODE>mode,
 918                                           i * BYTES_PER_SVE_VECTOR);
 919         aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
 920       }
 921     DONE;
 922   }
 923   [(set_attr "length" "<insn_length>")]
 924 )
 925
 926 ;; -------------------------------------------------------------------------
 927 ;; ---- Moves of predicates
 928 ;; -------------------------------------------------------------------------
 929 ;; Includes:
 930 ;; - MOV
 931 ;; - LDR
 932 ;; - PFALSE
 933 ;; - PTRUE
 934 ;; - PTRUES
 935 ;; - STR
 936 ;; -------------------------------------------------------------------------
 937
 938 (define_expand "mov<mode>"
 939   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 940         (match_operand:PRED_ALL 1 "general_operand"))]
 941   "TARGET_SVE"
 942   {
 943     if (GET_CODE (operands[0]) == MEM)
 944       operands[1] = force_reg (<MODE>mode, operands[1]);
 945
 946     if (CONSTANT_P (operands[1]))
 947       {
 948         aarch64_expand_mov_immediate (operands[0], operands[1]);
 949         DONE;
 950       }
 951   }
 952 )
 953
 954 (define_insn "*aarch64_sve_mov<mode>"
 955   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 956         (match_operand:PRED_ALL 1 "aarch64_mov_operand"))]
 957   "TARGET_SVE
 958    && (register_operand (operands[0], <MODE>mode)
 959        || register_operand (operands[1], <MODE>mode))"
 960   {@ [ cons: =0 , 1    ]
 961      [ Upa      , Upa  ] mov\t%0.b, %1.b
 962      [ m        , Upa  ] str\t%1, %0
 963      [ Upa      , m    ] ldr\t%0, %1
 964      [ Upa      , Dn   ] << aarch64_output_sve_mov_immediate (operands[1]);
 965   }
 966 )
 967
 968 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
 969 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
 970   [(set (reg:CC_NZC CC_REGNUM)
 971         (unspec:CC_NZC
 972           [(match_operand 2)
 973            (match_operand 3)
 974            (const_int SVE_KNOWN_PTRUE)
 975            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 976              [(unspec:VNx16BI
 977                 [(match_operand:SI 4 "const_int_operand")
 978                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 979                 UNSPEC_PTRUE)])]
 980           UNSPEC_PTEST))
 981    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 982         (match_dup 1))]
 983   "TARGET_SVE"
 984   {
 985     return aarch64_output_sve_ptrues (operands[1]);
 986   }
 987   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 988   {
 989     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 990   }
 991 )
 992
 993 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
 994 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
 995   [(set (reg:CC_NZC CC_REGNUM)
 996         (unspec:CC_NZC
 997           [(match_operand 2)
 998            (match_operand 3)
 999            (const_int SVE_KNOWN_PTRUE)
1000            (subreg:PRED_HSD
1001              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1002                [(unspec:VNx16BI
1003                   [(match_operand:SI 4 "const_int_operand")
1004                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1005                   UNSPEC_PTRUE)]) 0)]
1006           UNSPEC_PTEST))
1007    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1008         (match_dup 1))]
1009   "TARGET_SVE"
1010   {
1011     return aarch64_output_sve_ptrues (operands[1]);
1012   }
1013   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1014   {
1015     operands[2] = CONSTM1_RTX (VNx16BImode);
1016     operands[3] = CONSTM1_RTX (<MODE>mode);
1017   }
1018 )
1019
1020 ;; Match PTRUES Pn.B when only the flags result is useful (which is
1021 ;; a way of testing VL).
1022 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1023   [(set (reg:CC_NZC CC_REGNUM)
1024         (unspec:CC_NZC
1025           [(match_operand 2)
1026            (match_operand 3)
1027            (const_int SVE_KNOWN_PTRUE)
1028            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1029              [(unspec:VNx16BI
1030                 [(match_operand:SI 4 "const_int_operand")
1031                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1032                 UNSPEC_PTRUE)])]
1033           UNSPEC_PTEST))
1034    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1035   "TARGET_SVE"
1036   {
1037     return aarch64_output_sve_ptrues (operands[1]);
1038   }
1039   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1040   {
1041     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1042   }
1043 )
1044
1045 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1046 ;; a way of testing VL).
1047 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1048   [(set (reg:CC_NZC CC_REGNUM)
1049         (unspec:CC_NZC
1050           [(match_operand 2)
1051            (match_operand 3)
1052            (const_int SVE_KNOWN_PTRUE)
1053            (subreg:PRED_HSD
1054              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1055                [(unspec:VNx16BI
1056                   [(match_operand:SI 4 "const_int_operand")
1057                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1058                   UNSPEC_PTRUE)]) 0)]
1059           UNSPEC_PTEST))
1060    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1061   "TARGET_SVE"
1062   {
1063     return aarch64_output_sve_ptrues (operands[1]);
1064   }
1065   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1066   {
1067     operands[2] = CONSTM1_RTX (VNx16BImode);
1068     operands[3] = CONSTM1_RTX (<MODE>mode);
1069   }
1070 )
1071
1072 ;; -------------------------------------------------------------------------
1073 ;; ---- Moves of multiple predicates
1074 ;; -------------------------------------------------------------------------
1075
1076 (define_insn_and_split "mov<mode>"
1077   [(set (match_operand:SVE_STRUCT_BI 0 "nonimmediate_operand")
1078         (match_operand:SVE_STRUCT_BI 1 "aarch64_mov_operand"))]
1079   "TARGET_SVE"
1080   {@ [ cons: =0 , 1   ]
1081      [ Upa      , Upa ] #
1082      [ Upa      , m   ] #
1083      [ m        , Upa ] #
1084   }
1085   "&& reload_completed"
1086   [(const_int 0)]
1087   {
1088     aarch64_split_move (operands[0], operands[1], VNx16BImode);
1089     DONE;
1090   }
1091 )
1092
1093 ;; -------------------------------------------------------------------------
1094 ;; ---- Moves relating to the FFR
1095 ;; -------------------------------------------------------------------------
1096 ;; RDFFR
1097 ;; RDFFRS
1098 ;; SETFFR
1099 ;; WRFFR
1100 ;; -------------------------------------------------------------------------
1101
1102 ;; [W1 in the block comment above about FFR handling]
1103 ;;
1104 ;; Write to the FFR and start a new FFRT scheduling region.
1105 (define_insn "aarch64_wrffr"
1106   [(set (reg:VNx16BI FFR_REGNUM)
1107         (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one"))
1108    (set (reg:VNx16BI FFRT_REGNUM)
1109         (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1110   "TARGET_SVE && TARGET_NON_STREAMING"
1111   {@ [ cons: 0 ]
1112      [ Dm      ] setffr
1113      [ Upa     ] wrffr\t%0.b
1114   }
1115 )
1116
1117 ;; [L2 in the block comment above about FFR handling]
1118 ;;
1119 ;; Introduce a read from and write to the FFR in the current FFRT region,
1120 ;; so that the FFR value is live on entry to the region and so that the FFR
1121 ;; value visibly changes within the region.  This is used (possibly multiple
1122 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1123 (define_insn "aarch64_update_ffr_for_load"
1124   [(set (reg:VNx16BI FFR_REGNUM)
1125         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1126                          (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1127   "TARGET_SVE"
1128   ""
1129   [(set_attr "type" "no_insn")]
1130 )
1131
1132 ;; [R1 in the block comment above about FFR handling]
1133 ;;
1134 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1135 ;; can be read from there by the RDFFR instructions below.  This acts
1136 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1137 ;; creates a natural dependency with earlier writes.
1138 (define_insn "aarch64_copy_ffr_to_ffrt"
1139   [(set (reg:VNx16BI FFRT_REGNUM)
1140         (reg:VNx16BI FFR_REGNUM))]
1141   "TARGET_SVE"
1142   ""
1143   [(set_attr "type" "no_insn")]
1144 )
1145
1146 ;; [R2 in the block comment above about FFR handling]
1147 ;;
1148 ;; Read the FFR via the FFRT.
1149 (define_insn "aarch64_rdffr"
1150   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1151         (reg:VNx16BI FFRT_REGNUM))]
1152   "TARGET_SVE && TARGET_NON_STREAMING"
1153   "rdffr\t%0.b"
1154 )
1155
1156 ;; Likewise with zero predication.
1157 (define_insn "aarch64_rdffr_z"
1158   [(set (match_operand:VNx16BI 0 "register_operand")
1159         (and:VNx16BI
1160           (reg:VNx16BI FFRT_REGNUM)
1161           (match_operand:VNx16BI 1 "register_operand")))]
1162   "TARGET_SVE && TARGET_NON_STREAMING"
1163   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1164      [ &Upa    , Upa ; yes                 ] rdffr\t%0.b, %1/z
1165      [ ?Upa    , 0Upa; yes                 ] ^
1166      [ Upa     , Upa ; no                  ] ^
1167   }
1168 )
1169
1170 ;; Read the FFR to test for a fault, without using the predicate result.
1171 (define_insn "*aarch64_rdffr_z_ptest"
1172   [(set (reg:CC_NZC CC_REGNUM)
1173         (unspec:CC_NZC
1174           [(match_operand:VNx16BI 1 "register_operand")
1175            (match_dup 1)
1176            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1177            (and:VNx16BI
1178              (reg:VNx16BI FFRT_REGNUM)
1179              (match_dup 1))]
1180           UNSPEC_PTEST))
1181    (clobber (match_scratch:VNx16BI 0))]
1182   "TARGET_SVE && TARGET_NON_STREAMING"
1183   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1184      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1185      [ ?Upa    , 0Upa; yes                 ] ^
1186      [ Upa     , Upa ; no                  ] ^
1187   }
1188 )
1189
1190 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1191 (define_insn "*aarch64_rdffr_ptest"
1192   [(set (reg:CC_NZC CC_REGNUM)
1193         (unspec:CC_NZC
1194           [(match_operand:VNx16BI 1 "register_operand")
1195            (match_dup 1)
1196            (const_int SVE_KNOWN_PTRUE)
1197            (reg:VNx16BI FFRT_REGNUM)]
1198           UNSPEC_PTEST))
1199    (clobber (match_scratch:VNx16BI 0))]
1200   "TARGET_SVE && TARGET_NON_STREAMING"
1201   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1202      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1203      [ ?Upa    , 0Upa; yes                 ] ^
1204      [ Upa     , Upa ; no                  ] ^
1205   }
1206 )
1207
1208 ;; Read the FFR with zero predication and test the result.
1209 (define_insn "*aarch64_rdffr_z_cc"
1210   [(set (reg:CC_NZC CC_REGNUM)
1211         (unspec:CC_NZC
1212           [(match_operand:VNx16BI 1 "register_operand")
1213            (match_dup 1)
1214            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1215            (and:VNx16BI
1216              (reg:VNx16BI FFRT_REGNUM)
1217              (match_dup 1))]
1218           UNSPEC_PTEST))
1219    (set (match_operand:VNx16BI 0 "register_operand")
1220         (and:VNx16BI
1221           (reg:VNx16BI FFRT_REGNUM)
1222           (match_dup 1)))]
1223   "TARGET_SVE && TARGET_NON_STREAMING"
1224   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1225      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1226      [ ?Upa    , 0Upa; yes                 ] ^
1227      [ Upa     , Upa ; no                  ] ^
1228   }
1229 )
1230
1231 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1232 (define_insn "*aarch64_rdffr_cc"
1233   [(set (reg:CC_NZC CC_REGNUM)
1234         (unspec:CC_NZC
1235           [(match_operand:VNx16BI 1 "register_operand")
1236            (match_dup 1)
1237            (const_int SVE_KNOWN_PTRUE)
1238            (reg:VNx16BI FFRT_REGNUM)]
1239           UNSPEC_PTEST))
1240    (set (match_operand:VNx16BI 0 "register_operand")
1241         (reg:VNx16BI FFRT_REGNUM))]
1242   "TARGET_SVE && TARGET_NON_STREAMING"
1243   {@ [ cons: =0, 1   ; attrs: pred_clobber ]
1244      [ &Upa    , Upa ; yes                 ] rdffrs\t%0.b, %1/z
1245      [ ?Upa    , 0Upa; yes                 ] ^
1246      [ Upa     , Upa ; no                  ] ^
1247   }
1248 )
1249
1250 ;; [R3 in the block comment above about FFR handling]
1251 ;;
1252 ;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
1253 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1254 (define_insn "aarch64_update_ffrt"
1255   [(set (reg:VNx16BI FFRT_REGNUM)
1256         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1257   "TARGET_SVE"
1258   ""
1259   [(set_attr "type" "no_insn")]
1260 )
1261
1262 ;; =========================================================================
1263 ;; == Loads
1264 ;; =========================================================================
1265
1266 ;; -------------------------------------------------------------------------
1267 ;; ---- Normal contiguous loads
1268 ;; -------------------------------------------------------------------------
1269 ;; Includes contiguous forms of:
1270 ;; - LD1B
1271 ;; - LD1D
1272 ;; - LD1H
1273 ;; - LD1W
1274 ;; - LD2B
1275 ;; - LD2D
1276 ;; - LD2H
1277 ;; - LD2W
1278 ;; - LD3B
1279 ;; - LD3D
1280 ;; - LD3H
1281 ;; - LD3W
1282 ;; - LD4B
1283 ;; - LD4D
1284 ;; - LD4H
1285 ;; - LD4W
1286 ;; -------------------------------------------------------------------------
1287
1288 ;; Predicated LD1 (single).
1289 (define_insn "maskload<mode><vpred>"
1290   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1291         (unspec:SVE_ALL
1292           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1293            (match_operand:SVE_ALL 1 "memory_operand" "m")
1294            (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
1295           UNSPEC_LD1_SVE))]
1296   "TARGET_SVE"
1297   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1298 )
1299
1300 ;; Unpredicated LD[234].
1301 (define_expand "vec_load_lanes<mode><vsingle>"
1302   [(set (match_operand:SVE_STRUCT 0 "register_operand")
1303         (unspec:SVE_STRUCT
1304           [(match_dup 2)
1305            (match_operand:SVE_STRUCT 1 "memory_operand")
1306            (match_dup 3)]
1307           UNSPEC_LDN))]
1308   "TARGET_SVE"
1309   {
1310     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1311     operands[3] = CONST0_RTX (<MODE>mode);
1312   }
1313 )
1314
1315 ;; Predicated LD[234].
1316 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1317   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1318         (unspec:SVE_STRUCT
1319           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1320            (match_operand:SVE_STRUCT 1 "memory_operand" "m")
1321            (match_operand 3 "aarch64_maskload_else_operand")]
1322           UNSPEC_LDN))]
1323   "TARGET_SVE"
1324   "ld<vector_count><Vesize>\t%0, %2/z, %1"
1325 )
1326
1327 ;; -------------------------------------------------------------------------
1328 ;; ---- Extending contiguous loads
1329 ;; -------------------------------------------------------------------------
1330 ;; Includes contiguous forms of:
1331 ;; LD1B
1332 ;; LD1H
1333 ;; LD1SB
1334 ;; LD1SH
1335 ;; LD1SW
1336 ;; LD1W
1337 ;; -------------------------------------------------------------------------
1338
1339 ;; Predicated load and extend, with 8 elements per 128-bit block.
1340 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1341   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1342         (unspec:SVE_HSDI
1343           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1344            (ANY_EXTEND:SVE_HSDI
1345              (unspec:SVE_PARTIAL_I
1346                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1347                 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")
1348                 (match_operand:SVE_PARTIAL_I 4 "aarch64_maskload_else_operand")]
1349                UNSPEC_LD1_SVE))]
1350           UNSPEC_PRED_X))]
1351   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1352   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1353   "&& !CONSTANT_P (operands[3])"
1354   {
1355     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1356   }
1357 )
1358
1359 ;; Same as above without the maskload_else_operand to still allow combine to
1360 ;; match a sign-extended pred_mov pattern.
1361 (define_insn_and_rewrite "*aarch64_load_<ANY_EXTEND:optab>_mov<SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1362   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1363         (unspec:SVE_HSDI
1364           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1365            (ANY_EXTEND:SVE_HSDI
1366              (unspec:SVE_PARTIAL_I
1367                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1368                 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1369                 UNSPEC_PRED_X))]
1370            UNSPEC_PRED_X))]
1371   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1372   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1373   "&& !CONSTANT_P (operands[3])"
1374   {
1375     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1376   }
1377 )
1378
1379 ;; -------------------------------------------------------------------------
1380 ;; ---- First-faulting contiguous loads
1381 ;; -------------------------------------------------------------------------
1382 ;; Includes contiguous forms of:
1383 ;; - LDFF1B
1384 ;; - LDFF1D
1385 ;; - LDFF1H
1386 ;; - LDFF1W
1387 ;; - LDNF1B
1388 ;; - LDNF1D
1389 ;; - LDNF1H
1390 ;; - LDNF1W
1391 ;; -------------------------------------------------------------------------
1392
1393 ;; Contiguous non-extending first-faulting or non-faulting loads.
1394 (define_insn "@aarch64_ld<fn>f1<mode>"
1395   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1396         (unspec:SVE_FULL
1397           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1398            (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1399            (reg:VNx16BI FFRT_REGNUM)]
1400           SVE_LDFF1_LDNF1))]
1401   "TARGET_SVE && TARGET_NON_STREAMING"
1402   "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1403 )
1404
1405 ;; -------------------------------------------------------------------------
1406 ;; ---- First-faulting extending contiguous loads
1407 ;; -------------------------------------------------------------------------
1408 ;; Includes contiguous forms of:
1409 ;; - LDFF1B
1410 ;; - LDFF1H
1411 ;; - LDFF1SB
1412 ;; - LDFF1SH
1413 ;; - LDFF1SW
1414 ;; - LDFF1W
1415 ;; - LDNF1B
1416 ;; - LDNF1H
1417 ;; - LDNF1SB
1418 ;; - LDNF1SH
1419 ;; - LDNF1SW
1420 ;; - LDNF1W
1421 ;; -------------------------------------------------------------------------
1422
1423 ;; Predicated first-faulting or non-faulting load and extend.
1424 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1425   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1426         (unspec:SVE_HSDI
1427           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1428            (ANY_EXTEND:SVE_HSDI
1429              (unspec:SVE_PARTIAL_I
1430                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1431                 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1432                 (reg:VNx16BI FFRT_REGNUM)]
1433                SVE_LDFF1_LDNF1))]
1434           UNSPEC_PRED_X))]
1435   "TARGET_SVE
1436    && TARGET_NON_STREAMING
1437    && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1438   "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1439   "&& !CONSTANT_P (operands[3])"
1440   {
1441     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1442   }
1443 )
1444
1445 ;; -------------------------------------------------------------------------
1446 ;; ---- Non-temporal contiguous loads
1447 ;; -------------------------------------------------------------------------
1448 ;; Includes:
1449 ;; - LDNT1B
1450 ;; - LDNT1D
1451 ;; - LDNT1H
1452 ;; - LDNT1W
1453 ;; -------------------------------------------------------------------------
1454
1455 ;; Predicated contiguous non-temporal load (single).
1456 (define_insn "@aarch64_ldnt1<mode>"
1457   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1458         (unspec:SVE_FULL
1459           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1460            (match_operand:SVE_FULL 1 "memory_operand" "m")
1461            (match_operand:SVE_FULL 3 "aarch64_maskload_else_operand")]
1462           UNSPEC_LDNT1_SVE))]
1463   "TARGET_SVE"
1464   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1465 )
1466
1467 ;; -------------------------------------------------------------------------
1468 ;; ---- Normal gather loads
1469 ;; -------------------------------------------------------------------------
1470 ;; Includes gather forms of:
1471 ;; - LD1D
1472 ;; - LD1W
1473 ;; -------------------------------------------------------------------------
1474
1475 ;; Unpredicated gather loads.
1476 (define_expand "gather_load<mode><v_int_container>"
1477   [(set (match_operand:SVE_24 0 "register_operand")
1478         (unspec:SVE_24
1479           [(match_dup 5)
1480            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1481            (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1482            (match_operand:DI 3 "const_int_operand")
1483            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1484            (match_dup 6)
1485            (mem:BLK (scratch))]
1486           UNSPEC_LD1_GATHER))]
1487   "TARGET_SVE && TARGET_NON_STREAMING"
1488   {
1489     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1490     operands[6] = CONST0_RTX (<MODE>mode);
1491   }
1492 )
1493
1494 ;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
1495 ;; unsigned extension and false for signed extension.
1496 (define_insn "mask_gather_load<mode><v_int_container>"
1497   [(set (match_operand:SVE_4 0 "register_operand")
1498         (unspec:SVE_4
1499           [(match_operand:VNx4BI 5 "register_operand")
1500            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1501            (match_operand:VNx4SI 2 "register_operand")
1502            (match_operand:DI 3 "const_int_operand")
1503            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1504            (match_operand:SVE_4 6 "aarch64_maskload_else_operand")
1505            (mem:BLK (scratch))]
1506           UNSPEC_LD1_GATHER))]
1507   "TARGET_SVE && TARGET_NON_STREAMING"
1508   {@ [cons: =0, 1, 2, 3, 4, 5  ]
1509      [&w, Z,   w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1510      [?w, Z,   0, Ui1, Ui1, Upl] ^
1511      [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1512      [?w, vgw, 0, Ui1, Ui1, Upl] ^
1513      [&w, rk,  w, Z,   Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1514      [?w, rk,  0, Z,   Ui1, Upl] ^
1515      [&w, rk,  w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1516      [?w, rk,  0, Ui1, Ui1, Upl] ^
1517      [&w, rk,  w, Z,   i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1518      [?w, rk,  0, Z,   i,   Upl] ^
1519      [&w, rk,  w, Ui1, i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1520      [?w, rk,  0, Ui1, i,   Upl] ^
1521   }
1522 )
1523
1524 ;; Predicated gather loads for 64-bit elements.  The value of operand 3
1525 ;; doesn't matter in this case.
1526 (define_insn "mask_gather_load<mode><v_int_container>"
1527   [(set (match_operand:SVE_2 0 "register_operand")
1528         (unspec:SVE_2
1529           [(match_operand:VNx2BI 5 "register_operand")
1530            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1531            (match_operand:VNx2DI 2 "register_operand")
1532            (match_operand:DI 3 "const_int_operand")
1533            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1534            (match_operand:SVE_2 6 "aarch64_maskload_else_operand")
1535            (mem:BLK (scratch))]
1536           UNSPEC_LD1_GATHER))]
1537   "TARGET_SVE && TARGET_NON_STREAMING"
1538   {@ [cons: =0, 1, 2, 3, 4, 5]
1539      [&w, Z,   w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1540      [?w, Z,   0, i, Ui1, Upl] ^
1541      [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1542      [?w, vgd, 0, i, Ui1, Upl] ^
1543      [&w, rk,  w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1544      [?w, rk,  0, i, Ui1, Upl] ^
1545      [&w, rk,  w, i, i,   Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1546      [?w, rk,  0, i, i,   Upl] ^
1547   }
1548 )
1549
1550 ;; Likewise, but with the offset being extended from 32 bits.
1551 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1552   [(set (match_operand:SVE_2 0 "register_operand")
1553         (unspec:SVE_2
1554           [(match_operand:VNx2BI 5 "register_operand")
1555            (match_operand:DI 1 "register_operand")
1556            (unspec:VNx2DI
1557              [(match_operand 6)
1558               (ANY_EXTEND:VNx2DI
1559                 (match_operand:VNx2SI 2 "register_operand"))]
1560              UNSPEC_PRED_X)
1561            (match_operand:DI 3 "const_int_operand")
1562            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1563            (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
1564            (mem:BLK (scratch))]
1565           UNSPEC_LD1_GATHER))]
1566   "TARGET_SVE && TARGET_NON_STREAMING"
1567   {@ [cons: =0, 1, 2, 3, 4, 5]
1568      [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1569      [?w, rk, 0, i, Ui1, Upl ] ^
1570      [&w, rk, w, i, i,   Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]
1571      [?w, rk, 0, i, i,   Upl ] ^
1572   }
1573   "&& !CONSTANT_P (operands[6])"
1574   {
1575     operands[6] = CONSTM1_RTX (VNx2BImode);
1576   }
1577 )
1578
1579 ;; Likewise, but with the offset being truncated to 32 bits and then
1580 ;; sign-extended.
1581 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1582   [(set (match_operand:SVE_2 0 "register_operand")
1583         (unspec:SVE_2
1584           [(match_operand:VNx2BI 5 "register_operand")
1585            (match_operand:DI 1 "register_operand")
1586            (unspec:VNx2DI
1587              [(match_operand 6)
1588               (sign_extend:VNx2DI
1589                 (truncate:VNx2SI
1590                   (match_operand:VNx2DI 2 "register_operand")))]
1591              UNSPEC_PRED_X)
1592            (match_operand:DI 3 "const_int_operand")
1593            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1594            (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
1595            (mem:BLK (scratch))]
1596           UNSPEC_LD1_GATHER))]
1597   "TARGET_SVE && TARGET_NON_STREAMING"
1598   {@ [cons: =0, 1, 2, 3, 4, 5]
1599      [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1600      [?w, rk, 0, i, Ui1, Upl ] ^
1601      [&w, rk, w, i, i,   Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1602      [?w, rk, 0, i, i,   Upl ] ^
1603   }
1604   "&& !CONSTANT_P (operands[6])"
1605   {
1606     operands[6] = CONSTM1_RTX (VNx2BImode);
1607   }
1608 )
1609
1610 ;; Likewise, but with the offset being truncated to 32 bits and then
1611 ;; zero-extended.
1612 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1613   [(set (match_operand:SVE_2 0 "register_operand")
1614         (unspec:SVE_2
1615           [(match_operand:VNx2BI 5 "register_operand")
1616            (match_operand:DI 1 "register_operand")
1617            (and:VNx2DI
1618              (match_operand:VNx2DI 2 "register_operand")
1619              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1620            (match_operand:DI 3 "const_int_operand")
1621            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1622            (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
1623            (mem:BLK (scratch))]
1624           UNSPEC_LD1_GATHER))]
1625   "TARGET_SVE && TARGET_NON_STREAMING"
1626   {@ [cons: =0, 1, 2, 3, 4, 5]
1627      [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1628      [?w, rk, 0, i, Ui1, Upl ] ^
1629      [&w, rk, w, i, i,   Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1630      [?w, rk, 0, i, i,   Upl ] ^
1631   }
1632 )
1633
1634 ;; -------------------------------------------------------------------------
1635 ;; ---- Extending gather loads
1636 ;; -------------------------------------------------------------------------
1637 ;; Includes gather forms of:
1638 ;; - LD1B
1639 ;; - LD1H
1640 ;; - LD1SB
1641 ;; - LD1SH
1642 ;; - LD1SW
1643 ;; - LD1W
1644 ;; -------------------------------------------------------------------------
1645
1646 ;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
1647 ;; true for unsigned extension and false for signed extension.
1648 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1649   [(set (match_operand:SVE_4HSI 0 "register_operand")
1650         (unspec:SVE_4HSI
1651           [(match_operand:VNx4BI 6 "general_operand")
1652            (ANY_EXTEND:SVE_4HSI
1653              (unspec:SVE_4BHI
1654                [(match_operand:VNx4BI 5 "register_operand")
1655                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>")
1656                 (match_operand:VNx4SI 2 "register_operand")
1657                 (match_operand:DI 3 "const_int_operand")
1658                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>")
1659                 (match_operand:SVE_4BHI 7 "aarch64_maskload_else_operand")
1660                 (mem:BLK (scratch))]
1661                UNSPEC_LD1_GATHER))]
1662           UNSPEC_PRED_X))]
1663   "TARGET_SVE
1664    && TARGET_NON_STREAMING
1665    && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1666   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1667      [&w, Z,                   w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1668      [?w, Z,                   0, Ui1, Ui1, Upl, UplDnm] ^
1669      [&w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1670      [?w, vg<SVE_4BHI:Vesize>, 0, Ui1, Ui1, Upl, UplDnm] ^
1671      [&w, rk,                  w, Z,   Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1672      [?w, rk,                  0, Z,   Ui1, Upl, UplDnm] ^
1673      [&w, rk,                  w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1674      [?w, rk,                  0, Ui1, Ui1, Upl, UplDnm] ^
1675      [&w, rk,                  w, Z,   i,   Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1676      [?w, rk,                  0, Z,   i,   Upl, UplDnm] ^
1677      [&w, rk,                  w, Ui1, i,   Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1678      [?w, rk,                  0, Ui1, i,   Upl, UplDnm] ^
1679   }
1680   "&& !CONSTANT_P (operands[6])"
1681   {
1682     operands[6] = CONSTM1_RTX (VNx4BImode);
1683   }
1684 )
1685
1686 ;; Predicated extending gather loads for 64-bit elements.  The value of
1687 ;; operand 3 doesn't matter in this case.
1688 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1689   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1690         (unspec:SVE_2HSDI
1691           [(match_operand:VNx2BI 6 "general_operand")
1692            (ANY_EXTEND:SVE_2HSDI
1693              (unspec:SVE_2BHSI
1694                [(match_operand:VNx2BI 5 "register_operand")
1695                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>")
1696                 (match_operand:VNx2DI 2 "register_operand")
1697                 (match_operand:DI 3 "const_int_operand")
1698                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1699                 (match_operand:SVE_2BHSI 7 "aarch64_maskload_else_operand")
1700                 (mem:BLK (scratch))]
1701                UNSPEC_LD1_GATHER))]
1702           UNSPEC_PRED_X))]
1703   "TARGET_SVE
1704    && TARGET_NON_STREAMING
1705    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1706   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1707      [&w, Z,                    w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1708      [?w, Z,                    0, i, Ui1, Upl, UplDnm] ^
1709      [&w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1710      [?w, vg<SVE_2BHSI:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1711      [&w, rk,                   w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1712      [?w, rk,                   0, i, Ui1, Upl, UplDnm] ^
1713      [&w, rk,                   w, i, i,   Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1714      [?w, rk,                   0, i, i,   Upl, UplDnm] ^
1715   }
1716   "&& !CONSTANT_P (operands[6])"
1717   {
1718     operands[6] = CONSTM1_RTX (VNx2BImode);
1719   }
1720 )
1721
1722 ;; Likewise, but with the offset being extended from 32 bits.
1723 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1724   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1725         (unspec:SVE_2HSDI
1726           [(match_operand 6)
1727            (ANY_EXTEND:SVE_2HSDI
1728              (unspec:SVE_2BHSI
1729                [(match_operand:VNx2BI 5 "register_operand")
1730                 (match_operand:DI 1 "aarch64_reg_or_zero")
1731                 (unspec:VNx2DI
1732                   [(match_operand 7)
1733                    (ANY_EXTEND2:VNx2DI
1734                      (match_operand:VNx2SI 2 "register_operand"))]
1735                   UNSPEC_PRED_X)
1736                 (match_operand:DI 3 "const_int_operand")
1737                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1738                 (match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
1739                 (mem:BLK (scratch))]
1740                UNSPEC_LD1_GATHER))]
1741           UNSPEC_PRED_X))]
1742   "TARGET_SVE
1743    && TARGET_NON_STREAMING
1744    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1745   {@ [cons: =0, 1, 2, 3, 4, 5]
1746      [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1747      [?w, rk, 0, i, Ui1, Upl ] ^
1748      [&w, rk, w, i, i,   Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]
1749      [?w, rk, 0, i, i,   Upl ] ^
1750   }
1751   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1752   {
1753     operands[6] = CONSTM1_RTX (VNx2BImode);
1754     operands[7] = CONSTM1_RTX (VNx2BImode);
1755   }
1756 )
1757
1758 ;; Likewise, but with the offset being truncated to 32 bits and then
1759 ;; sign-extended.
1760 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1761   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1762         (unspec:SVE_2HSDI
1763           [(match_operand 6)
1764            (ANY_EXTEND:SVE_2HSDI
1765              (unspec:SVE_2BHSI
1766                [(match_operand:VNx2BI 5 "register_operand")
1767                 (match_operand:DI 1 "aarch64_reg_or_zero")
1768                 (unspec:VNx2DI
1769                   [(match_operand 7)
1770                    (sign_extend:VNx2DI
1771                      (truncate:VNx2SI
1772                        (match_operand:VNx2DI 2 "register_operand")))]
1773                   UNSPEC_PRED_X)
1774                 (match_operand:DI 3 "const_int_operand")
1775                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1776                 (match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
1777                 (mem:BLK (scratch))]
1778                UNSPEC_LD1_GATHER))]
1779           UNSPEC_PRED_X))]
1780   "TARGET_SVE
1781    && TARGET_NON_STREAMING
1782    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1783   {@ [cons: =0, 1, 2, 3, 4, 5]
1784      [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1785      [?w, rk, 0, i, Ui1, Upl ] ^
1786      [&w, rk, w, i, i,   Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1787      [?w, rk, 0, i, i,   Upl ] ^
1788   }
1789   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1790   {
1791     operands[6] = CONSTM1_RTX (VNx2BImode);
1792     operands[7] = CONSTM1_RTX (VNx2BImode);
1793   }
1794 )
1795
1796 ;; Likewise, but with the offset being truncated to 32 bits and then
1797 ;; zero-extended.
1798 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1799   [(set (match_operand:SVE_2HSDI 0 "register_operand")
1800         (unspec:SVE_2HSDI
1801           [(match_operand 7)
1802            (ANY_EXTEND:SVE_2HSDI
1803              (unspec:SVE_2BHSI
1804                [(match_operand:VNx2BI 5 "register_operand")
1805                 (match_operand:DI 1 "aarch64_reg_or_zero")
1806                 (and:VNx2DI
1807                   (match_operand:VNx2DI 2 "register_operand")
1808                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1809                 (match_operand:DI 3 "const_int_operand")
1810                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1811                 (match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
1812                 (mem:BLK (scratch))]
1813                UNSPEC_LD1_GATHER))]
1814           UNSPEC_PRED_X))]
1815   "TARGET_SVE
1816    && TARGET_NON_STREAMING
1817    && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1818   {@ [cons: =0, 1, 2, 3, 4, 5]
1819      [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1820      [?w, rk, 0, i, Ui1, Upl ] ^
1821      [&w, rk, w, i, i,   Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1822      [?w, rk, 0, i, i,   Upl ] ^
1823   }
1824   "&& !CONSTANT_P (operands[7])"
1825   {
1826     operands[7] = CONSTM1_RTX (VNx2BImode);
1827   }
1828 )
1829
1830 ;; -------------------------------------------------------------------------
1831 ;; ---- First-faulting gather loads
1832 ;; -------------------------------------------------------------------------
1833 ;; Includes gather forms of:
1834 ;; - LDFF1D
1835 ;; - LDFF1W
1836 ;; -------------------------------------------------------------------------
1837
1838 ;; Predicated first-faulting gather loads for 32-bit elements.  Operand
1839 ;; 3 is true for unsigned extension and false for signed extension.
1840 (define_insn "@aarch64_ldff1_gather<mode>"
1841   [(set (match_operand:SVE_FULL_S 0 "register_operand")
1842         (unspec:SVE_FULL_S
1843           [(match_operand:VNx4BI 5 "register_operand")
1844            (match_operand:DI 1 "aarch64_sve_gather_offset_w")
1845            (match_operand:VNx4SI 2 "register_operand")
1846            (match_operand:DI 3 "const_int_operand")
1847            (match_operand:DI 4 "aarch64_gather_scale_operand_w")
1848            (mem:BLK (scratch))
1849            (reg:VNx16BI FFRT_REGNUM)]
1850           UNSPEC_LDFF1_GATHER))]
1851   "TARGET_SVE && TARGET_NON_STREAMING"
1852   {@ [cons: =0, 1, 2, 3, 4, 5  ]
1853      [&w, Z,   w, i,   Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s]
1854      [?w, Z,   0, i,   Ui1, Upl] ^
1855      [&w, vgw, w, i,   Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1]
1856      [?w, vgw, 0, i,   Ui1, Upl] ^
1857      [&w, rk,  w, Z,   Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1858      [?w, rk,  0, Z,   Ui1, Upl] ^
1859      [&w, rk,  w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1860      [?w, rk,  0, Ui1, Ui1, Upl] ^
1861      [&w, rk,  w, Z,   i,   Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1862      [?w, rk,  0, Z,   i,   Upl] ^
1863      [&w, rk,  w, Ui1, i,   Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1864      [?w, rk,  0, Ui1, i,   Upl] ^
1865   }
1866 )
1867
1868 ;; Predicated first-faulting gather loads for 64-bit elements.  The value
1869 ;; of operand 3 doesn't matter in this case.
1870 (define_insn "@aarch64_ldff1_gather<mode>"
1871   [(set (match_operand:SVE_FULL_D 0 "register_operand")
1872         (unspec:SVE_FULL_D
1873           [(match_operand:VNx2BI 5 "register_operand")
1874            (match_operand:DI 1 "aarch64_sve_gather_offset_d")
1875            (match_operand:VNx2DI 2 "register_operand")
1876            (match_operand:DI 3 "const_int_operand")
1877            (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1878            (mem:BLK (scratch))
1879            (reg:VNx16BI FFRT_REGNUM)]
1880           UNSPEC_LDFF1_GATHER))]
1881   "TARGET_SVE && TARGET_NON_STREAMING"
1882   {@ [cons: =0, 1, 2, 3, 4, 5 ]
1883      [&w, Z,   w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d]
1884      [?w, Z,   0, i, Ui1, Upl ] ^
1885      [&w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1]
1886      [?w, vgd, 0, i, Ui1, Upl ] ^
1887      [&w, rk,  w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d]
1888      [?w, rk,  0, i, Ui1, Upl ] ^
1889      [&w, rk,  w, i, i,   Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1890      [?w, rk,  0, i, i,   Upl ] ^
1891   }
1892 )
1893
1894 ;; Likewise, but with the offset being sign-extended from 32 bits.
1895 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1896   [(set (match_operand:SVE_FULL_D 0 "register_operand")
1897         (unspec:SVE_FULL_D
1898           [(match_operand:VNx2BI 5 "register_operand")
1899            (match_operand:DI 1 "register_operand")
1900            (unspec:VNx2DI
1901              [(match_operand 6)
1902               (sign_extend:VNx2DI
1903                 (truncate:VNx2SI
1904                   (match_operand:VNx2DI 2 "register_operand")))]
1905              UNSPEC_PRED_X)
1906            (match_operand:DI 3 "const_int_operand")
1907            (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1908            (mem:BLK (scratch))
1909            (reg:VNx16BI FFRT_REGNUM)]
1910           UNSPEC_LDFF1_GATHER))]
1911   "TARGET_SVE && TARGET_NON_STREAMING"
1912   {@ [cons: =0, 1, 2, 3, 4, 5]
1913      [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1914      [?w, rk, 0, i, Ui1, Upl ] ^
1915      [&w, rk, w, i, i,   Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1916      [?w, rk, 0, i, i,   Upl ] ^
1917   }
1918   "&& !CONSTANT_P (operands[6])"
1919   {
1920     operands[6] = CONSTM1_RTX (VNx2BImode);
1921   }
1922 )
1923
1924 ;; Likewise, but with the offset being zero-extended from 32 bits.
1925 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1926   [(set (match_operand:SVE_FULL_D 0 "register_operand")
1927         (unspec:SVE_FULL_D
1928           [(match_operand:VNx2BI 5 "register_operand")
1929            (match_operand:DI 1 "register_operand")
1930            (and:VNx2DI
1931              (match_operand:VNx2DI 2 "register_operand")
1932              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1933            (match_operand:DI 3 "const_int_operand")
1934            (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1935            (mem:BLK (scratch))
1936            (reg:VNx16BI FFRT_REGNUM)]
1937           UNSPEC_LDFF1_GATHER))]
1938   "TARGET_SVE && TARGET_NON_STREAMING"
1939   {@ [cons: =0, 1, 2, 3, 4, 5]
1940      [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1941      [?w, rk, 0, i, Ui1, Upl ] ^
1942      [&w, rk, w, i, i,   Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1943      [?w, rk, 0, i, i,   Upl ] ^
1944   }
1945 )
1946
1947 ;; -------------------------------------------------------------------------
1948 ;; ---- First-faulting extending gather loads
1949 ;; -------------------------------------------------------------------------
1950 ;; Includes gather forms of:
1951 ;; - LDFF1B
1952 ;; - LDFF1H
1953 ;; - LDFF1SB
1954 ;; - LDFF1SH
1955 ;; - LDFF1SW
1956 ;; - LDFF1W
1957 ;; -------------------------------------------------------------------------
1958
1959 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1960 ;; Operand 3 is true for unsigned extension and false for signed extension.
1961 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1962   [(set (match_operand:VNx4_WIDE 0 "register_operand")
1963         (unspec:VNx4_WIDE
1964           [(match_operand:VNx4BI 6 "general_operand")
1965            (ANY_EXTEND:VNx4_WIDE
1966              (unspec:VNx4_NARROW
1967                [(match_operand:VNx4BI 5 "register_operand")
1968                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>")
1969                 (match_operand:VNx4_WIDE 2 "register_operand")
1970                 (match_operand:DI 3 "const_int_operand")
1971                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>")
1972                 (mem:BLK (scratch))
1973                 (reg:VNx16BI FFRT_REGNUM)]
1974                UNSPEC_LDFF1_GATHER))]
1975           UNSPEC_PRED_X))]
1976   "TARGET_SVE && TARGET_NON_STREAMING"
1977   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1978      [&w, Z,                      w, i,   Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1979      [?w, Z,                      0, i,   Ui1, Upl, UplDnm] ^
1980      [&w, vg<VNx4_NARROW:Vesize>, w, i,   Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1981      [?w, vg<VNx4_NARROW:Vesize>, 0, i,   Ui1, Upl, UplDnm] ^
1982      [&w, rk,                     w, Z,   Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1983      [?w, rk,                     0, Z,   Ui1, Upl, UplDnm] ^
1984      [&w, rk,                     w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1985      [?w, rk,                     0, Ui1, Ui1, Upl, UplDnm] ^
1986      [&w, rk,                     w, Z,   i,   Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1987      [?w, rk,                     0, Z,   i,   Upl, UplDnm] ^
1988      [&w, rk,                     w, Ui1, i,   Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1989      [?w, rk,                     0, Ui1, i,   Upl, UplDnm] ^
1990   }
1991   "&& !CONSTANT_P (operands[6])"
1992   {
1993     operands[6] = CONSTM1_RTX (VNx4BImode);
1994   }
1995 )
1996
1997 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1998 ;; The value of operand 3 doesn't matter in this case.
1999 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
2000   [(set (match_operand:VNx2_WIDE 0 "register_operand")
2001         (unspec:VNx2_WIDE
2002           [(match_operand:VNx2BI 6 "general_operand")
2003            (ANY_EXTEND:VNx2_WIDE
2004              (unspec:VNx2_NARROW
2005                [(match_operand:VNx2BI 5 "register_operand")
2006                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>")
2007                 (match_operand:VNx2_WIDE 2 "register_operand")
2008                 (match_operand:DI 3 "const_int_operand")
2009                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2010                 (mem:BLK (scratch))
2011                 (reg:VNx16BI FFRT_REGNUM)]
2012                UNSPEC_LDFF1_GATHER))]
2013           UNSPEC_PRED_X))]
2014   "TARGET_SVE && TARGET_NON_STREAMING"
2015   {@ [cons: =0, 1, 2, 3, 4, 5, 6]
2016      [&w, Z,                      w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
2017      [?w, Z,                      0, i, Ui1, Upl, UplDnm] ^
2018      [&w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
2019      [?w, vg<VNx2_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
2020      [&w, rk,                     w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
2021      [?w, rk,                     0, i, Ui1, Upl, UplDnm] ^
2022      [&w, rk,                     w, i, i,   Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
2023      [?w, rk,                     w, i, i,   Upl, UplDnm] ^
2024   }
2025   "&& !CONSTANT_P (operands[6])"
2026   {
2027     operands[6] = CONSTM1_RTX (VNx2BImode);
2028   }
2029 )
2030
2031 ;; Likewise, but with the offset being sign-extended from 32 bits.
2032 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
2033   [(set (match_operand:VNx2_WIDE 0 "register_operand")
2034         (unspec:VNx2_WIDE
2035           [(match_operand 6)
2036            (ANY_EXTEND:VNx2_WIDE
2037              (unspec:VNx2_NARROW
2038                [(match_operand:VNx2BI 5 "register_operand")
2039                 (match_operand:DI 1 "aarch64_reg_or_zero")
2040                 (unspec:VNx2DI
2041                   [(match_operand 7)
2042                    (sign_extend:VNx2DI
2043                      (truncate:VNx2SI
2044                        (match_operand:VNx2DI 2 "register_operand")))]
2045                   UNSPEC_PRED_X)
2046                 (match_operand:DI 3 "const_int_operand")
2047                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2048                 (mem:BLK (scratch))
2049                 (reg:VNx16BI FFRT_REGNUM)]
2050                UNSPEC_LDFF1_GATHER))]
2051           UNSPEC_PRED_X))]
2052   "TARGET_SVE && TARGET_NON_STREAMING"
2053   {@ [cons: =0, 1, 2, 3, 4, 5]
2054      [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
2055      [?w, rk, 0, i, Ui1, Upl ] ^
2056      [&w, rk, w, i, i,   Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
2057      [?w, rk, 0, i, i,   Upl ] ^
2058   }
2059   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
2060   {
2061     operands[6] = CONSTM1_RTX (VNx2BImode);
2062     operands[7] = CONSTM1_RTX (VNx2BImode);
2063   }
2064 )
2065
2066 ;; Likewise, but with the offset being zero-extended from 32 bits.
2067 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
2068   [(set (match_operand:VNx2_WIDE 0 "register_operand")
2069         (unspec:VNx2_WIDE
2070           [(match_operand 7)
2071            (ANY_EXTEND:VNx2_WIDE
2072              (unspec:VNx2_NARROW
2073                [(match_operand:VNx2BI 5 "register_operand")
2074                 (match_operand:DI 1 "aarch64_reg_or_zero")
2075                 (and:VNx2DI
2076                   (match_operand:VNx2DI 2 "register_operand")
2077                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2078                 (match_operand:DI 3 "const_int_operand")
2079                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2080                 (mem:BLK (scratch))
2081                 (reg:VNx16BI FFRT_REGNUM)]
2082                UNSPEC_LDFF1_GATHER))]
2083           UNSPEC_PRED_X))]
2084   "TARGET_SVE && TARGET_NON_STREAMING"
2085   {@ [cons: =0, 1, 2, 3, 4, 5]
2086      [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
2087      [?w, rk, 0, i, Ui1, Upl ] ^
2088      [&w, rk, w, i, i,   Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
2089      [?w, rk, 0, i, i,   Upl ] ^
2090   }
2091   "&& !CONSTANT_P (operands[7])"
2092   {
2093     operands[7] = CONSTM1_RTX (VNx2BImode);
2094   }
2095 )
2096
2097 ;; =========================================================================
2098 ;; == Prefetches
2099 ;; =========================================================================
2100
2101 ;; -------------------------------------------------------------------------
2102 ;; ---- Contiguous prefetches
2103 ;; -------------------------------------------------------------------------
2104 ;; Includes contiguous forms of:
2105 ;; - PRFB
2106 ;; - PRFD
2107 ;; - PRFH
2108 ;; - PRFW
2109 ;; -------------------------------------------------------------------------
2110
2111 ;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
2112 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
2113 ;; information.
2114 (define_insn "@aarch64_sve_prefetch<mode>"
2115   [(prefetch (unspec:DI
2116                [(match_operand:<VPRED> 0 "register_operand" "Upl")
2117                 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
2118                 (match_operand:DI 2 "const_int_operand")]
2119                UNSPEC_SVE_PREFETCH)
2120              (match_operand:DI 3 "const_int_operand")
2121              (match_operand:DI 4 "const_int_operand"))]
2122   "TARGET_SVE"
2123   {
2124     operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
2125     return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
2126   }
2127 )
2128
2129 ;; -------------------------------------------------------------------------
2130 ;; ---- Gather prefetches
2131 ;; -------------------------------------------------------------------------
2132 ;; Includes gather forms of:
2133 ;; - PRFB
2134 ;; - PRFD
2135 ;; - PRFH
2136 ;; - PRFW
2137 ;; -------------------------------------------------------------------------
2138
2139 ;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
2140 ;; are:
2141 ;; 0: the governing predicate
2142 ;; 1: the scalar component of the address
2143 ;; 2: the vector component of the address
2144 ;; 3: 1 for zero extension, 0 for sign extension
2145 ;; 4: the scale multiplier
2146 ;; 5: a vector zero that identifies the mode of data being accessed
2147 ;; 6: the prefetch operator (an svprfop)
2148 ;; 7: the normal RTL prefetch rw flag
2149 ;; 8: the normal RTL prefetch locality value
2150 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
2151   [(prefetch (unspec:DI
2152                [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2153                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
2154                 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
2155                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
2156                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2157                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2158                 (match_operand:DI 6 "const_int_operand")]
2159                UNSPEC_SVE_PREFETCH_GATHER)
2160              (match_operand:DI 7 "const_int_operand")
2161              (match_operand:DI 8 "const_int_operand"))]
2162   "TARGET_SVE && TARGET_NON_STREAMING"
2163   {
2164     static const char *const insns[][2] = {
2165       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
2166       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
2167       "prfb", "%0, [%1, %2.s, sxtw]",
2168       "prfb", "%0, [%1, %2.s, uxtw]",
2169       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
2170       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
2171     };
2172     const char *const *parts = insns[which_alternative];
2173     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2174   }
2175 )
2176
2177 ;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
2178 ;; doesn't matter in this case.
2179 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
2180   [(prefetch (unspec:DI
2181                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
2182                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
2183                 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2184                 (match_operand:DI 3 "const_int_operand")
2185                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2186                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2187                 (match_operand:DI 6 "const_int_operand")]
2188                UNSPEC_SVE_PREFETCH_GATHER)
2189              (match_operand:DI 7 "const_int_operand")
2190              (match_operand:DI 8 "const_int_operand"))]
2191   "TARGET_SVE && TARGET_NON_STREAMING"
2192   {
2193     static const char *const insns[][2] = {
2194       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2195       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
2196       "prfb", "%0, [%1, %2.d]",
2197       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
2198     };
2199     const char *const *parts = insns[which_alternative];
2200     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2201   }
2202 )
2203
2204 ;; Likewise, but with the offset being sign-extended from 32 bits.
2205 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
2206   [(prefetch (unspec:DI
2207                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2208                 (match_operand:DI 1 "register_operand" "rk, rk")
2209                 (unspec:VNx2DI_ONLY
2210                   [(match_operand 9)
2211                    (sign_extend:VNx2DI
2212                      (truncate:VNx2SI
2213                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2214                   UNSPEC_PRED_X)
2215                 (match_operand:DI 3 "const_int_operand")
2216                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2217                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2218                 (match_operand:DI 6 "const_int_operand")]
2219                UNSPEC_SVE_PREFETCH_GATHER)
2220              (match_operand:DI 7 "const_int_operand")
2221              (match_operand:DI 8 "const_int_operand"))]
2222   "TARGET_SVE && TARGET_NON_STREAMING"
2223   {
2224     static const char *const insns[][2] = {
2225       "prfb", "%0, [%1, %2.d, sxtw]",
2226       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
2227     };
2228     const char *const *parts = insns[which_alternative];
2229     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2230   }
2231   "&& !rtx_equal_p (operands[0], operands[9])"
2232   {
2233     operands[9] = copy_rtx (operands[0]);
2234   }
2235 )
2236
2237 ;; Likewise, but with the offset being zero-extended from 32 bits.
2238 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
2239   [(prefetch (unspec:DI
2240                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2241                 (match_operand:DI 1 "register_operand" "rk, rk")
2242                 (and:VNx2DI_ONLY
2243                   (match_operand:VNx2DI 2 "register_operand" "w, w")
2244                   (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2245                 (match_operand:DI 3 "const_int_operand")
2246                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2247                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2248                 (match_operand:DI 6 "const_int_operand")]
2249                UNSPEC_SVE_PREFETCH_GATHER)
2250              (match_operand:DI 7 "const_int_operand")
2251              (match_operand:DI 8 "const_int_operand"))]
2252   "TARGET_SVE && TARGET_NON_STREAMING"
2253   {
2254     static const char *const insns[][2] = {
2255       "prfb", "%0, [%1, %2.d, uxtw]",
2256       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
2257     };
2258     const char *const *parts = insns[which_alternative];
2259     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2260   }
2261 )
2262
2263 ;; =========================================================================
2264 ;; == Stores
2265 ;; =========================================================================
2266
2267 ;; -------------------------------------------------------------------------
2268 ;; ---- Normal contiguous stores
2269 ;; -------------------------------------------------------------------------
2270 ;; Includes contiguous forms of:
2271 ;; - ST1B
2272 ;; - ST1D
2273 ;; - ST1H
2274 ;; - ST1W
2275 ;; - ST2B
2276 ;; - ST2D
2277 ;; - ST2H
2278 ;; - ST2W
2279 ;; - ST3B
2280 ;; - ST3D
2281 ;; - ST3H
2282 ;; - ST3W
2283 ;; - ST4B
2284 ;; - ST4D
2285 ;; - ST4H
2286 ;; - ST4W
2287 ;; -------------------------------------------------------------------------
2288
2289 ;; Predicated ST1 (single).
2290 (define_insn "maskstore<mode><vpred>"
2291   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2292         (unspec:SVE_ALL
2293           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2294            (match_operand:SVE_ALL 1 "register_operand" "w")
2295            (match_dup 0)]
2296           UNSPEC_ST1_SVE))]
2297   "TARGET_SVE"
2298   "st1<Vesize>\t%1.<Vctype>, %2, %0"
2299 )
2300
2301 ;; Unpredicated ST[234].  This is always a full update, so the dependence
2302 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2303 ;; There doesn't seem to be any obvious benefit to treating the all-true
2304 ;; case differently though.  In particular, it's very unlikely that we'll
2305 ;; only find out during RTL that a store_lanes is dead.
2306 (define_expand "vec_store_lanes<mode><vsingle>"
2307   [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2308         (unspec:SVE_STRUCT
2309           [(match_dup 2)
2310            (match_operand:SVE_STRUCT 1 "register_operand")
2311            (match_dup 0)]
2312           UNSPEC_STN))]
2313   "TARGET_SVE"
2314   {
2315     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2316   }
2317 )
2318
2319 ;; Predicated ST[234].
2320 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2321   [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2322         (unspec:SVE_STRUCT
2323           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2324            (match_operand:SVE_STRUCT 1 "register_operand" "w")
2325            (match_dup 0)]
2326           UNSPEC_STN))]
2327   "TARGET_SVE"
2328   "st<vector_count><Vesize>\t%1, %2, %0"
2329 )
2330
2331 ;; -------------------------------------------------------------------------
2332 ;; ---- Truncating contiguous stores
2333 ;; -------------------------------------------------------------------------
2334 ;; Includes:
2335 ;; - ST1B
2336 ;; - ST1H
2337 ;; - ST1W
2338 ;; -------------------------------------------------------------------------
2339
2340 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2341 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2342   [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2343         (unspec:VNx8_NARROW
2344           [(match_operand:VNx8BI 2 "register_operand" "Upl")
2345            (truncate:VNx8_NARROW
2346              (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2347            (match_dup 0)]
2348           UNSPEC_ST1_SVE))]
2349   "TARGET_SVE"
2350   "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2351 )
2352
2353 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2354 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2355   [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2356         (unspec:VNx4_NARROW
2357           [(match_operand:VNx4BI 2 "register_operand" "Upl")
2358            (truncate:VNx4_NARROW
2359              (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2360            (match_dup 0)]
2361           UNSPEC_ST1_SVE))]
2362   "TARGET_SVE"
2363   "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2364 )
2365
2366 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2367 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2368   [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2369         (unspec:VNx2_NARROW
2370           [(match_operand:VNx2BI 2 "register_operand" "Upl")
2371            (truncate:VNx2_NARROW
2372              (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2373            (match_dup 0)]
2374           UNSPEC_ST1_SVE))]
2375   "TARGET_SVE"
2376   "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2377 )
2378
2379 ;; -------------------------------------------------------------------------
2380 ;; ---- Non-temporal contiguous stores
2381 ;; -------------------------------------------------------------------------
2382 ;; Includes:
2383 ;; - STNT1B
2384 ;; - STNT1D
2385 ;; - STNT1H
2386 ;; - STNT1W
2387 ;; -------------------------------------------------------------------------
2388
2389 (define_insn "@aarch64_stnt1<mode>"
2390   [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2391         (unspec:SVE_FULL
2392           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2393            (match_operand:SVE_FULL 1 "register_operand" "w")
2394            (match_dup 0)]
2395           UNSPEC_STNT1_SVE))]
2396   "TARGET_SVE"
2397   "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2398 )
2399
2400 ;; -------------------------------------------------------------------------
2401 ;; ---- Normal scatter stores
2402 ;; -------------------------------------------------------------------------
2403 ;; Includes scatter forms of:
2404 ;; - ST1D
2405 ;; - ST1W
2406 ;; -------------------------------------------------------------------------
2407
2408 ;; Unpredicated scatter stores.
2409 (define_expand "scatter_store<mode><v_int_container>"
2410   [(set (mem:BLK (scratch))
2411         (unspec:BLK
2412           [(match_dup 5)
2413            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2414            (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2415            (match_operand:DI 2 "const_int_operand")
2416            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2417            (match_operand:SVE_24 4 "register_operand")]
2418           UNSPEC_ST1_SCATTER))]
2419   "TARGET_SVE && TARGET_NON_STREAMING"
2420   {
2421     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2422   }
2423 )
2424
2425 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
2426 ;; unsigned extension and false for signed extension.
2427 (define_insn "mask_scatter_store<mode><v_int_container>"
2428   [(set (mem:BLK (scratch))
2429         (unspec:BLK
2430           [(match_operand:VNx4BI 5 "register_operand")
2431            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2432            (match_operand:VNx4SI 1 "register_operand")
2433            (match_operand:DI 2 "const_int_operand")
2434            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2435            (match_operand:SVE_4 4 "register_operand")]
2436           UNSPEC_ST1_SCATTER))]
2437   "TARGET_SVE && TARGET_NON_STREAMING"
2438   {@ [ cons: 0 , 1 , 2   , 3   , 4 , 5    ]
2439      [ Z       , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s]
2440      [ vgw     , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2441      [ rk      , w , Z   , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2442      [ rk      , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2443      [ rk      , w , Z   , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2444      [ rk      , w , Ui1 , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2445   }
2446 )
2447
2448 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
2449 ;; doesn't matter in this case.
2450 (define_insn "mask_scatter_store<mode><v_int_container>"
2451   [(set (mem:BLK (scratch))
2452         (unspec:BLK
2453           [(match_operand:VNx2BI 5 "register_operand")
2454            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2455            (match_operand:VNx2DI 1 "register_operand")
2456            (match_operand:DI 2 "const_int_operand")
2457            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2458            (match_operand:SVE_2 4 "register_operand")]
2459           UNSPEC_ST1_SCATTER))]
2460   "TARGET_SVE && TARGET_NON_STREAMING"
2461   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2462      [ Z       , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d]
2463      [ vgd     , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2464      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2465      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2466   }
2467 )
2468
2469 ;; Likewise, but with the offset being extended from 32 bits.
2470 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2471   [(set (mem:BLK (scratch))
2472         (unspec:BLK
2473           [(match_operand:VNx2BI 5 "register_operand")
2474            (match_operand:DI 0 "register_operand")
2475            (unspec:VNx2DI
2476              [(match_operand 6)
2477               (ANY_EXTEND:VNx2DI
2478                 (match_operand:VNx2SI 1 "register_operand"))]
2479              UNSPEC_PRED_X)
2480            (match_operand:DI 2 "const_int_operand")
2481            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2482            (match_operand:SVE_2 4 "register_operand")]
2483           UNSPEC_ST1_SCATTER))]
2484   "TARGET_SVE && TARGET_NON_STREAMING"
2485   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2486      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2487      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]
2488   }
2489   "&& !CONSTANT_P (operands[6])"
2490   {
2491     operands[6] = CONSTM1_RTX (<VPRED>mode);
2492   }
2493 )
2494
2495 ;; Likewise, but with the offset being truncated to 32 bits and then
2496 ;; sign-extended.
2497 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2498   [(set (mem:BLK (scratch))
2499         (unspec:BLK
2500           [(match_operand:VNx2BI 5 "register_operand")
2501            (match_operand:DI 0 "register_operand")
2502            (unspec:VNx2DI
2503              [(match_operand 6)
2504               (sign_extend:VNx2DI
2505                 (truncate:VNx2SI
2506                   (match_operand:VNx2DI 1 "register_operand")))]
2507              UNSPEC_PRED_X)
2508            (match_operand:DI 2 "const_int_operand")
2509            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2510            (match_operand:SVE_2 4 "register_operand")]
2511           UNSPEC_ST1_SCATTER))]
2512   "TARGET_SVE && TARGET_NON_STREAMING"
2513   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2514      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2515      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2516   }
2517   "&& !CONSTANT_P (operands[6])"
2518   {
2519     operands[6] = CONSTM1_RTX (<VPRED>mode);
2520   }
2521 )
2522
2523 ;; Likewise, but with the offset being truncated to 32 bits and then
2524 ;; zero-extended.
2525 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2526   [(set (mem:BLK (scratch))
2527         (unspec:BLK
2528           [(match_operand:VNx2BI 5 "register_operand")
2529            (match_operand:DI 0 "aarch64_reg_or_zero")
2530            (and:VNx2DI
2531              (match_operand:VNx2DI 1 "register_operand")
2532              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2533            (match_operand:DI 2 "const_int_operand")
2534            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2535            (match_operand:SVE_2 4 "register_operand")]
2536           UNSPEC_ST1_SCATTER))]
2537   "TARGET_SVE && TARGET_NON_STREAMING"
2538   {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
2539      [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2540      [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2541   }
2542 )
2543
2544 ;; -------------------------------------------------------------------------
2545 ;; ---- Truncating scatter stores
2546 ;; -------------------------------------------------------------------------
2547 ;; Includes scatter forms of:
2548 ;; - ST1B
2549 ;; - ST1H
2550 ;; - ST1W
2551 ;; -------------------------------------------------------------------------
2552
2553 ;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
2554 ;; true for unsigned extension and false for signed extension.
2555 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2556   [(set (mem:BLK (scratch))
2557         (unspec:BLK
2558           [(match_operand:VNx4BI 5 "register_operand")
2559            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2560            (match_operand:VNx4SI 1 "register_operand")
2561            (match_operand:DI 2 "const_int_operand")
2562            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2563            (truncate:VNx4_NARROW
2564              (match_operand:VNx4_WIDE 4 "register_operand"))]
2565           UNSPEC_ST1_SCATTER))]
2566   "TARGET_SVE && TARGET_NON_STREAMING"
2567   {@ [ cons: 1 , 2   , 4 , 5    ]
2568      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2569      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2570      [ w       , Z   , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2571      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2572      [ w       , Z   , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2573      [ w       , Ui1 , w , Upl  ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2574   }
2575 )
2576
2577 ;; Predicated truncating scatter stores for 64-bit elements.  The value of
2578 ;; operand 2 doesn't matter in this case.
2579 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2580   [(set (mem:BLK (scratch))
2581         (unspec:BLK
2582           [(match_operand:VNx2BI 5 "register_operand")
2583            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2584            (match_operand:VNx2DI 1 "register_operand")
2585            (match_operand:DI 2 "const_int_operand")
2586            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2587            (truncate:VNx2_NARROW
2588              (match_operand:VNx2_WIDE 4 "register_operand"))]
2589           UNSPEC_ST1_SCATTER))]
2590   "TARGET_SVE && TARGET_NON_STREAMING"
2591   {@ [ cons: 1 , 4 , 5    ]
2592      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2593      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2594      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2595      [ w       , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2596   }
2597 )
2598
2599 ;; Likewise, but with the offset being sign-extended from 32 bits.
2600 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2601   [(set (mem:BLK (scratch))
2602         (unspec:BLK
2603           [(match_operand:VNx2BI 5 "register_operand")
2604            (match_operand:DI 0 "register_operand")
2605            (unspec:VNx2DI
2606              [(match_operand 6)
2607               (sign_extend:VNx2DI
2608                 (truncate:VNx2SI
2609                   (match_operand:VNx2DI 1 "register_operand")))]
2610              UNSPEC_PRED_X)
2611            (match_operand:DI 2 "const_int_operand")
2612            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2613            (truncate:VNx2_NARROW
2614              (match_operand:VNx2_WIDE 4 "register_operand"))]
2615           UNSPEC_ST1_SCATTER))]
2616   "TARGET_SVE && TARGET_NON_STREAMING"
2617   {@ [ cons: 0 , 1 , 4 , 5    ]
2618      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2619      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2620   }
2621   "&& !rtx_equal_p (operands[5], operands[6])"
2622   {
2623     operands[6] = copy_rtx (operands[5]);
2624   }
2625 )
2626
2627 ;; Likewise, but with the offset being zero-extended from 32 bits.
2628 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2629   [(set (mem:BLK (scratch))
2630         (unspec:BLK
2631           [(match_operand:VNx2BI 5 "register_operand")
2632            (match_operand:DI 0 "aarch64_reg_or_zero")
2633            (and:VNx2DI
2634              (match_operand:VNx2DI 1 "register_operand")
2635              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2636            (match_operand:DI 2 "const_int_operand")
2637            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2638            (truncate:VNx2_NARROW
2639              (match_operand:VNx2_WIDE 4 "register_operand"))]
2640           UNSPEC_ST1_SCATTER))]
2641   "TARGET_SVE && TARGET_NON_STREAMING"
2642   {@ [ cons: 0 , 1 , 4 , 5    ]
2643      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2644      [ rk      , w , w , Upl  ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2645   }
2646 )
2647
2648 ;; =========================================================================
2649 ;; == Vector creation
2650 ;; =========================================================================
2651
2652 ;; -------------------------------------------------------------------------
2653 ;; ---- [INT,FP] Duplicate element
2654 ;; -------------------------------------------------------------------------
2655 ;; Includes:
2656 ;; - DUP
2657 ;; - MOV
2658 ;; - LD1RB
2659 ;; - LD1RD
2660 ;; - LD1RH
2661 ;; - LD1RW
2662 ;; - LD1ROB (F64MM)
2663 ;; - LD1ROD (F64MM)
2664 ;; - LD1ROH (F64MM)
2665 ;; - LD1ROW (F64MM)
2666 ;; - LD1RQB
2667 ;; - LD1RQD
2668 ;; - LD1RQH
2669 ;; - LD1RQW
2670 ;; -------------------------------------------------------------------------
2671
2672 (define_expand "vec_duplicate<mode>"
2673   [(parallel
2674     [(set (match_operand:SVE_ALL 0 "register_operand")
2675           (vec_duplicate:SVE_ALL
2676             (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2677      (clobber (scratch:VNx16BI))])]
2678   "TARGET_SVE"
2679   {
2680     if (MEM_P (operands[1]))
2681       {
2682         rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2683         emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2684                                        CONST0_RTX (<MODE>mode)));
2685         DONE;
2686       }
2687   }
2688 )
2689
2690 ;; Accept memory operands for the benefit of combine, and also in case
2691 ;; the scalar input gets spilled to memory during RA.  We want to split
2692 ;; the load at the first opportunity in order to allow the PTRUE to be
2693 ;; optimized with surrounding code.
2694 (define_insn_and_split "*vec_duplicate<mode>_reg"
2695   [(set (match_operand:SVE_ALL 0 "register_operand")
2696         (vec_duplicate:SVE_ALL
2697           (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2698    (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2699   "TARGET_SVE"
2700   {@ [ cons: =0 , 1   ; attrs: length ]
2701      [ w        , r   ; 4             ] mov\t%0.<Vetype>, %<vwcore>1
2702      [ w        , w   ; 4             ] mov\t%0.<Vetype>, %<Vetype>1
2703      [ w        , Uty ; 8             ] #
2704   }
2705   "&& MEM_P (operands[1])"
2706   [(const_int 0)]
2707   {
2708     if (GET_CODE (operands[2]) == SCRATCH)
2709       operands[2] = gen_reg_rtx (VNx16BImode);
2710     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2711     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2712     emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2713                                    CONST0_RTX (<MODE>mode)));
2714     DONE;
2715   }
2716 )
2717
2718 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2719 ;;
2720 ;; The addressing mode range of LD1RQ does not match the addressing mode
2721 ;; range of LDR Qn.  If the predicate enforced the LD1RQ range, we would
2722 ;; not be able to combine LDR Qns outside that range.  The predicate
2723 ;; therefore accepts all memory operands, with only the constraints
2724 ;; enforcing the actual restrictions.  If the instruction is split
2725 ;; before RA, we need to load invalid addresses into a temporary.
2726
2727 (define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le"
2728   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w")
2729         (vec_duplicate:SVE_FULL
2730           (match_operand:<V128> 1 "nonimmediate_operand" "w, UtQ")))
2731    (clobber (match_scratch:VNx16BI 2 "=X, Upl"))]
2732   "TARGET_SVE && !BYTES_BIG_ENDIAN"
2733   {
2734     switch (which_alternative)
2735       {
2736         case 0:
2737           operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2738           return "dup\t%0.q, %1.q[0]";
2739         case 1:
2740           return "#";
2741         default:
2742           gcc_unreachable ();
2743       }
2744   }
2745   "&& MEM_P (operands[1])"
2746   [(const_int 0)]
2747   {
2748     if (can_create_pseudo_p ()
2749         && !aarch64_sve_ld1rq_operand (operands[1], <V128>mode))
2750       operands[1] = force_reload_address (operands[1]);
2751     if (GET_CODE (operands[2]) == SCRATCH)
2752       operands[2] = gen_reg_rtx (VNx16BImode);
2753     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2754     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2755     emit_insn (gen_aarch64_sve_ld1rq<mode> (operands[0], operands[1], gp));
2756     DONE;
2757   }
2758 )
2759
2760 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2761 ;; The SVE register layout puts memory lane N into (architectural)
2762 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2763 ;; lsb into the register lsb.  We therefore have to describe this in rtl
2764 ;; terms as a reverse of the V128 vector followed by a duplicate.
2765 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2766   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2767         (vec_duplicate:SVE_FULL
2768           (vec_select:<V128>
2769             (match_operand:<V128> 1 "register_operand" "w")
2770             (match_operand 2 "descending_int_parallel"))))]
2771   "TARGET_SVE
2772    && BYTES_BIG_ENDIAN
2773    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2774                 GET_MODE_NUNITS (<V128>mode) - 1)"
2775   {
2776     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2777     return "dup\t%0.q, %1.q[0]";
2778   }
2779 )
2780
2781 ;; This is used for vec_duplicate<mode>s from memory, but can also
2782 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2783 ;; with zero.
2784 (define_insn "sve_ld1r<mode>"
2785   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2786         (unspec:SVE_ALL
2787           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2788            (vec_duplicate:SVE_ALL
2789              (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2790            (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2791           UNSPEC_SEL))]
2792   "TARGET_SVE"
2793   "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2794 )
2795
2796 ;; Load 128 bits from memory under predicate control and duplicate to
2797 ;; fill a vector.
2798 (define_insn "@aarch64_sve_ld1rq<mode>"
2799   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2800         (unspec:SVE_FULL
2801           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2802            (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2803           UNSPEC_LD1RQ))]
2804   "TARGET_SVE"
2805   {
2806     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2807     return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2808   }
2809 )
2810
2811 (define_insn "@aarch64_sve_ld1ro<mode>"
2812   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2813         (unspec:SVE_FULL
2814           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2815            (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2816                                "UO<Vesize>")]
2817           UNSPEC_LD1RO))]
2818   "TARGET_SVE_F64MM && TARGET_NON_STREAMING"
2819   {
2820     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2821     return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2822   }
2823 )
2824
2825 ;; -------------------------------------------------------------------------
2826 ;; ---- [INT,FP] Initialize from individual elements
2827 ;; -------------------------------------------------------------------------
2828 ;; Includes:
2829 ;; - INSR
2830 ;; -------------------------------------------------------------------------
2831
2832 (define_expand "vec_init<mode><Vel>"
2833   [(match_operand:SVE_FULL 0 "register_operand")
2834     (match_operand 1 "")]
2835   "TARGET_SVE"
2836   {
2837     aarch64_sve_expand_vector_init (operands[0], operands[1]);
2838     DONE;
2839   }
2840 )
2841
2842 ;; Vector constructor combining two half vectors { a, b }
2843 (define_expand "vec_init<mode><Vhalf>"
2844   [(match_operand:SVE_NO2E 0 "register_operand")
2845    (match_operand 1 "")]
2846   "TARGET_SVE"
2847   {
2848     aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
2849     DONE;
2850   }
2851 )
2852
2853 ;; Vector constructor combining four quad vectors { a, b, c, d }
2854 (define_expand "vec_init<mode><Vquad>"
2855   [(match_operand:SVE_NO4E 0 "register_operand")
2856    (match_operand 1 "")]
2857   "TARGET_SVE"
2858   {
2859     aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
2860     DONE;
2861   }
2862 )
2863
2864 ;; Vector constructor combining eight vectors { a, b, c, d, ... }
2865 (define_expand "vec_initvnx16qivnx2qi"
2866   [(match_operand:VNx16QI 0 "register_operand")
2867    (match_operand 1 "")]
2868   "TARGET_SVE"
2869   {
2870     aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
2871     DONE;
2872   }
2873 )
2874
2875 ;; Shift an SVE vector left and insert a scalar into element 0.
2876 (define_insn "vec_shl_insert_<mode>"
2877   [(set (match_operand:SVE_FULL 0 "register_operand")
2878         (unspec:SVE_FULL
2879           [(match_operand:SVE_FULL 1 "register_operand")
2880            (match_operand:<VEL> 2 "aarch64_reg_or_zero")]
2881           UNSPEC_INSR))]
2882   "TARGET_SVE"
2883   {@ [ cons: =0 , 1 , 2  ; attrs: movprfx ]
2884      [ ?w       , 0 , rZ ; *              ] insr\t%0.<Vetype>, %<vwcore>2
2885      [ w        , 0 , w  ; *              ] insr\t%0.<Vetype>, %<Vetype>2
2886      [ ??&w     , w , rZ ; yes            ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2887      [ ?&w      , w , w  ; yes            ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2
2888   }
2889 )
2890
2891 ;; -------------------------------------------------------------------------
2892 ;; ---- [INT] Linear series
2893 ;; -------------------------------------------------------------------------
2894 ;; Includes:
2895 ;; - INDEX
2896 ;; -------------------------------------------------------------------------
2897
2898 (define_insn "vec_series<mode>"
2899   [(set (match_operand:SVE_I 0 "register_operand")
2900         (vec_series:SVE_I
2901           (match_operand:<VEL> 1 "aarch64_sve_index_operand")
2902           (match_operand:<VEL> 2 "aarch64_sve_index_operand")))]
2903   "TARGET_SVE"
2904   {@ [ cons: =0 , 1   , 2    ]
2905      [ w        , Usi , r    ] index\t%0.<Vctype>, #%1, %<vccore>2
2906      [ w        , r   , Usi  ] index\t%0.<Vctype>, %<vccore>1, #%2
2907      [ w        , r   , r    ] index\t%0.<Vctype>, %<vccore>1, %<vccore>2
2908   }
2909 )
2910
2911 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2912 ;; of an INDEX instruction.
2913 (define_insn "*vec_series<mode>_plus"
2914   [(set (match_operand:SVE_I 0 "register_operand" "=w")
2915         (plus:SVE_I
2916           (vec_duplicate:SVE_I
2917             (match_operand:<VEL> 1 "register_operand" "r"))
2918           (match_operand:SVE_I 2 "immediate_operand")))]
2919   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2920   {
2921     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2922     return "index\t%0.<Vctype>, %<vccore>1, #%2";
2923   }
2924 )
2925
2926 ;; -------------------------------------------------------------------------
2927 ;; ---- [PRED] Duplicate element
2928 ;; -------------------------------------------------------------------------
2929 ;; The patterns in this section are synthetic.
2930 ;; -------------------------------------------------------------------------
2931
2932 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2933 ;; input into the top bit and using a WHILELO.  An alternative would be to
2934 ;; duplicate the input and do a compare with zero.
2935 (define_expand "vec_duplicate<mode>"
2936   [(set (match_operand:PRED_ALL 0 "register_operand")
2937         (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2938   "TARGET_SVE"
2939   {
2940     rtx tmp = gen_reg_rtx (DImode);
2941     rtx op1 = gen_lowpart (DImode, operands[1]);
2942     emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2943     emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2944     DONE;
2945   }
2946 )
2947
2948 ;; =========================================================================
2949 ;; == Vector decomposition
2950 ;; =========================================================================
2951
2952 ;; -------------------------------------------------------------------------
2953 ;; ---- [INT,FP] Extract index
2954 ;; -------------------------------------------------------------------------
2955 ;; Includes:
2956 ;; - DUP    (Advanced SIMD)
2957 ;; - DUP    (SVE)
2958 ;; - EXT    (SVE)
2959 ;; - ST1    (Advanced SIMD)
2960 ;; - UMOV   (Advanced SIMD)
2961 ;; -------------------------------------------------------------------------
2962
2963 (define_expand "vec_extract<mode><Vel>"
2964   [(set (match_operand:<VEL> 0 "register_operand")
2965         (vec_select:<VEL>
2966           (match_operand:SVE_FULL 1 "register_operand")
2967           (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2968   "TARGET_SVE"
2969   {
2970     poly_int64 val;
2971     if (poly_int_rtx_p (operands[2], &val)
2972         && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2973       {
2974         /* The last element can be extracted with a LASTB and a false
2975            predicate.  */
2976         rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2977         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2978         DONE;
2979       }
2980     if (!CONST_INT_P (operands[2]))
2981       {
2982         /* Create an index with operand[2] as the base and -1 as the step.
2983            It will then be zero for the element we care about.  */
2984         rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2985         index = force_reg (<VEL_INT>mode, index);
2986         rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2987         emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2988
2989         /* Get a predicate that is true for only that element.  */
2990         rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2991         rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2992         rtx sel = gen_reg_rtx (<VPRED>mode);
2993         emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2994
2995         /* Select the element using LASTB.  */
2996         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2997         DONE;
2998       }
2999   }
3000 )
3001
3002 ;; Extract element zero.  This is a special case because we want to force
3003 ;; the registers to be the same for the second alternative, and then
3004 ;; split the instruction into nothing after RA.
3005 (define_insn_and_split "*vec_extract<mode><Vel>_0"
3006   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3007         (vec_select:<VEL>
3008           (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
3009           (parallel [(const_int 0)])))]
3010   "TARGET_SVE"
3011   {
3012     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
3013     switch (which_alternative)
3014       {
3015         case 0:
3016           return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
3017         case 1:
3018           return "#";
3019         case 2:
3020           return "st1\\t{%1.<Vetype>}[0], %0";
3021         default:
3022           gcc_unreachable ();
3023       }
3024   }
3025   "&& reload_completed
3026    && REG_P (operands[0])
3027    && REGNO (operands[0]) == REGNO (operands[1])"
3028   [(const_int 0)]
3029   {
3030     emit_note (NOTE_INSN_DELETED);
3031     DONE;
3032   }
3033   [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
3034 )
3035
3036 ;; Extract an element from the Advanced SIMD portion of the register.
3037 ;; We don't just reuse the aarch64-simd.md pattern because we don't
3038 ;; want any change in lane number on big-endian targets.
3039 (define_insn "*vec_extract<mode><Vel>_v128"
3040   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3041         (vec_select:<VEL>
3042           (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
3043           (parallel [(match_operand:SI 2 "const_int_operand")])))]
3044   "TARGET_SVE
3045    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
3046   {
3047     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
3048     switch (which_alternative)
3049       {
3050         case 0:
3051           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3052         case 1:
3053           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3054         case 2:
3055           return "st1\\t{%1.<Vetype>}[%2], %0";
3056         default:
3057           gcc_unreachable ();
3058       }
3059   }
3060   [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
3061 )
3062
3063 ;; Extract an element in the range of DUP.  This pattern allows the
3064 ;; source and destination to be different.
3065 (define_insn "*vec_extract<mode><Vel>_dup"
3066   [(set (match_operand:<VEL> 0 "register_operand" "=w")
3067         (vec_select:<VEL>
3068           (match_operand:SVE_FULL 1 "register_operand" "w")
3069           (parallel [(match_operand:SI 2 "const_int_operand")])))]
3070   "TARGET_SVE
3071    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
3072   {
3073     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3074     return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
3075   }
3076 )
3077
3078 ;; Extract an element outside the range of DUP.  This pattern requires the
3079 ;; source and destination to be the same.
3080 (define_insn "*vec_extract<mode><Vel>_ext"
3081   [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
3082         (vec_select:<VEL>
3083           (match_operand:SVE_FULL 1 "register_operand" "0, w")
3084           (parallel [(match_operand:SI 2 "const_int_operand")])))]
3085   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
3086   {
3087     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3088     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
3089     return (which_alternative == 0
3090             ? "ext\t%0.b, %0.b, %0.b, #%2"
3091             : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
3092   }
3093   [(set_attr "movprfx" "*,yes")]
3094 )
3095
3096 ;; -------------------------------------------------------------------------
3097 ;; ---- [INT,FP] Extract active element
3098 ;; -------------------------------------------------------------------------
3099 ;; Includes:
3100 ;; - LASTA
3101 ;; - LASTB
3102 ;; -------------------------------------------------------------------------
3103
3104 ;; Extract the last active element of operand 1 into operand 0.
3105 ;; If no elements are active, extract the last inactive element instead.
3106 (define_insn "@extract_<last_op>_<mode>"
3107   [(set (match_operand:<VEL> 0 "register_operand")
3108         (unspec:<VEL>
3109           [(match_operand:<VPRED> 1 "register_operand")
3110            (match_operand:SVE_FULL 2 "register_operand")]
3111           LAST))]
3112   "TARGET_SVE"
3113   {@ [ cons: =0 , 1   , 2  ]
3114      [ ?r       , Upl , w  ] last<ab>\t%<vwcore>0, %1, %2.<Vetype>
3115      [ w        , Upl , w  ] last<ab>\t%<Vetype>0, %1, %2.<Vetype>
3116   }
3117 )
3118
3119 ;; -------------------------------------------------------------------------
3120 ;; ---- [PRED] Extract index
3121 ;; -------------------------------------------------------------------------
3122 ;; The patterns in this section are synthetic.
3123 ;; -------------------------------------------------------------------------
3124
3125 ;; Handle extractions from a predicate by converting to an integer vector
3126 ;; and extracting from there.
3127 (define_expand "vec_extract<vpred><Vel>"
3128   [(match_operand:<VEL> 0 "register_operand")
3129    (match_operand:<VPRED> 1 "register_operand")
3130    (match_operand:SI 2 "nonmemory_operand")
3131    ;; Dummy operand to which we can attach the iterator.
3132    (reg:SVE_FULL_I V0_REGNUM)]
3133   "TARGET_SVE"
3134   {
3135     rtx tmp = gen_reg_rtx (<MODE>mode);
3136     emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
3137                                              CONST1_RTX (<MODE>mode),
3138                                              CONST0_RTX (<MODE>mode)));
3139     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
3140     DONE;
3141   }
3142 )
3143
3144 ;; =========================================================================
3145 ;; == Unary arithmetic
3146 ;; =========================================================================
3147
3148 ;; -------------------------------------------------------------------------
3149 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
3150 ;; -------------------------------------------------------------------------
3151 ;; Includes:
3152 ;; - ABS
3153 ;; - CLS (= clrsb)
3154 ;; - CLZ
3155 ;; - CNT (= popcount)
3156 ;; - RBIT (= bitreverse)
3157 ;; - NEG
3158 ;; - NOT
3159 ;; -------------------------------------------------------------------------
3160
3161 (define_expand "ctz<mode>2"
3162   [(set (match_operand:SVE_I 0 "register_operand")
3163         (unspec:SVE_I
3164           [(match_dup 2)
3165            (ctz:SVE_I
3166              (match_operand:SVE_I 1 "register_operand"))]
3167           UNSPEC_PRED_X))]
3168   "TARGET_SVE"
3169   {
3170      rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3171      rtx temp = gen_reg_rtx (<MODE>mode);
3172      emit_insn (gen_aarch64_pred_rbit<mode> (temp, pred, operands[1]));
3173      emit_insn (gen_aarch64_pred_clz<mode> (operands[0], pred, temp));
3174      DONE;
3175   }
3176 )
3177
3178 ;; Unpredicated integer unary arithmetic.
3179 (define_expand "<optab><mode>2"
3180   [(set (match_operand:SVE_I 0 "register_operand")
3181         (unspec:SVE_I
3182           [(match_dup 2)
3183            (SVE_INT_UNARY:SVE_I
3184              (match_operand:SVE_I 1 "register_operand"))]
3185           UNSPEC_PRED_X))]
3186   "TARGET_SVE"
3187   {
3188     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3189   }
3190 )
3191
3192 ;; Integer unary arithmetic predicated with a PTRUE.
3193 (define_insn "@aarch64_pred_<optab><mode>"
3194   [(set (match_operand:SVE_VDQ_I 0 "register_operand")
3195         (unspec:SVE_VDQ_I
3196           [(match_operand:<VPRED> 1 "register_operand")
3197            (SVE_INT_UNARY:SVE_VDQ_I
3198              (match_operand:SVE_VDQ_I 2 "register_operand"))]
3199           UNSPEC_PRED_X))]
3200   "TARGET_SVE"
3201   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3202      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3203      [ ?&w      , Upl , w ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3204   }
3205 )
3206
3207 ;; Predicated integer unary arithmetic with merging.
3208 (define_expand "@cond_<optab><mode>"
3209   [(set (match_operand:SVE_I 0 "register_operand")
3210         (unspec:SVE_I
3211           [(match_operand:<VPRED> 1 "register_operand")
3212            (SVE_INT_UNARY:SVE_I
3213              (match_operand:SVE_I 2 "register_operand"))
3214            (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3215           UNSPEC_SEL))]
3216   "TARGET_SVE"
3217 )
3218
3219 ;; Predicated integer unary arithmetic, merging with the first input.
3220 (define_insn "*cond_<optab><mode>_2"
3221   [(set (match_operand:SVE_I 0 "register_operand")
3222         (unspec:SVE_I
3223           [(match_operand:<VPRED> 1 "register_operand")
3224            (SVE_INT_UNARY:SVE_I
3225              (match_operand:SVE_I 2 "register_operand"))
3226            (match_dup 2)]
3227           UNSPEC_SEL))]
3228   "TARGET_SVE"
3229   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3230      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3231      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3232   }
3233 )
3234
3235 ;; Predicated integer unary arithmetic, merging with an independent value.
3236 ;;
3237 ;; The earlyclobber isn't needed for the first alternative, but omitting
3238 ;; it would only help the case in which operands 2 and 3 are the same,
3239 ;; which is handled above rather than here.  Marking all the alternatives
3240 ;; as earlyclobber helps to make the instruction more regular to the
3241 ;; register allocator.
3242 (define_insn "*cond_<optab><mode>_any"
3243   [(set (match_operand:SVE_I 0 "register_operand")
3244         (unspec:SVE_I
3245           [(match_operand:<VPRED> 1 "register_operand")
3246            (SVE_INT_UNARY:SVE_I
3247              (match_operand:SVE_I 2 "register_operand"))
3248            (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3249           UNSPEC_SEL))]
3250   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3251   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3252      [ &w       , Upl , w , 0  ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3253      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3254      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3255   }
3256 )
3257
3258
3259 ;; -------------------------------------------------------------------------
3260 ;; ---- [INT] General unary arithmetic corresponding to unspecs
3261 ;; -------------------------------------------------------------------------
3262 ;; Includes
3263 ;; - REVB
3264 ;; - REVH
3265 ;; - REVW
3266 ;; -------------------------------------------------------------------------
3267
3268 ;; Predicated integer unary operations.
3269 (define_insn "@aarch64_pred_<optab><mode>"
3270   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3271         (unspec:SVE_FULL_I
3272           [(match_operand:<VPRED> 1 "register_operand")
3273            (unspec:SVE_FULL_I
3274              [(match_operand:SVE_FULL_I 2 "register_operand")]
3275              SVE_INT_UNARY)]
3276           UNSPEC_PRED_X))]
3277   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3278   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3279      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3280      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3281   }
3282 )
3283
3284 ;; Another way of expressing the REVB, REVH and REVW patterns, with this
3285 ;; form being easier for permutes.  The predicate mode determines the number
3286 ;; of lanes and the data mode decides the granularity of the reversal within
3287 ;; each lane.
3288 (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
3289   [(set (match_operand:SVE_ALL 0 "register_operand")
3290         (unspec:SVE_ALL
3291           [(match_operand:PRED_HSD 1 "register_operand")
3292            (unspec:SVE_ALL
3293              [(match_operand:SVE_ALL 2 "register_operand")]
3294              UNSPEC_REVBHW)]
3295           UNSPEC_PRED_X))]
3296   "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
3297   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3298      [ w        , Upl , 0 ; *              ] rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3299      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3300   }
3301 )
3302
3303 ;; Predicated integer unary operations with merging.
3304 (define_insn "@cond_<optab><mode>"
3305   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3306         (unspec:SVE_FULL_I
3307           [(match_operand:<VPRED> 1 "register_operand")
3308            (unspec:SVE_FULL_I
3309              [(match_operand:SVE_FULL_I 2 "register_operand")]
3310              SVE_INT_UNARY)
3311            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3312           UNSPEC_SEL))]
3313   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3314   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3315      [ w        , Upl , w , 0  ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3316      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3317      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3318   }
3319 )
3320
3321 ;; -------------------------------------------------------------------------
3322 ;; ---- [INT] Sign and zero extension
3323 ;; -------------------------------------------------------------------------
3324 ;; Includes:
3325 ;; - SXTB
3326 ;; - SXTH
3327 ;; - SXTW
3328 ;; - UXTB
3329 ;; - UXTH
3330 ;; - UXTW
3331 ;; -------------------------------------------------------------------------
3332
3333 ;; Unpredicated sign and zero extension from a narrower mode.
3334 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3335   [(set (match_operand:SVE_HSDI 0 "register_operand")
3336         (unspec:SVE_HSDI
3337           [(match_dup 2)
3338            (ANY_EXTEND:SVE_HSDI
3339              (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3340           UNSPEC_PRED_X))]
3341   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3342   {
3343     operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3344   }
3345 )
3346
3347 ;; Predicated sign and zero extension from a narrower mode.
3348 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3349   [(set (match_operand:SVE_HSDI 0 "register_operand")
3350         (unspec:SVE_HSDI
3351           [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
3352            (ANY_EXTEND:SVE_HSDI
3353              (match_operand:SVE_PARTIAL_I 2 "register_operand"))]
3354           UNSPEC_PRED_X))]
3355   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3356   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3357      [ w        , Upl , 0 ; *              ] <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3358      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3359   }
3360 )
3361
3362 ;; Predicated truncate-and-sign-extend operations.
3363 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3364   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3365         (unspec:SVE_FULL_HSDI
3366           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3367            (sign_extend:SVE_FULL_HSDI
3368              (truncate:SVE_PARTIAL_I
3369                (match_operand:SVE_FULL_HSDI 2 "register_operand")))]
3370           UNSPEC_PRED_X))]
3371   "TARGET_SVE
3372    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3373   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3374      [ w        , Upl , 0 ; *              ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3375      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3376   }
3377 )
3378
3379 ;; Predicated truncate-and-sign-extend operations with merging.
3380 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3381   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3382         (unspec:SVE_FULL_HSDI
3383           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3384            (sign_extend:SVE_FULL_HSDI
3385              (truncate:SVE_PARTIAL_I
3386                (match_operand:SVE_FULL_HSDI 2 "register_operand")))
3387            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
3388           UNSPEC_SEL))]
3389   "TARGET_SVE
3390    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3391   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3392      [ w        , Upl , w , 0  ; *              ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3393      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3394      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3395   }
3396 )
3397
3398 ;; Predicated truncate-and-zero-extend operations, merging with the
3399 ;; first input.
3400 ;;
3401 ;; The canonical form of this operation is an AND of a constant rather
3402 ;; than (zero_extend (truncate ...)).
3403 (define_insn "*cond_uxt<mode>_2"
3404   [(set (match_operand:SVE_I 0 "register_operand")
3405         (unspec:SVE_I
3406           [(match_operand:<VPRED> 1 "register_operand")
3407            (and:SVE_I
3408              (match_operand:SVE_I 2 "register_operand")
3409              (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3410            (match_dup 2)]
3411           UNSPEC_SEL))]
3412   "TARGET_SVE"
3413   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3414      [ w        , Upl , 0 ; *              ] uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3415      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3416   }
3417 )
3418
3419 ;; Predicated truncate-and-zero-extend operations, merging with an
3420 ;; independent value.
3421 ;;
3422 ;; The earlyclobber isn't needed for the first alternative, but omitting
3423 ;; it would only help the case in which operands 2 and 4 are the same,
3424 ;; which is handled above rather than here.  Marking all the alternatives
3425 ;; as early-clobber helps to make the instruction more regular to the
3426 ;; register allocator.
3427 (define_insn "*cond_uxt<mode>_any"
3428   [(set (match_operand:SVE_I 0 "register_operand")
3429         (unspec:SVE_I
3430           [(match_operand:<VPRED> 1 "register_operand")
3431            (and:SVE_I
3432              (match_operand:SVE_I 2 "register_operand")
3433              (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3434            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3435           UNSPEC_SEL))]
3436   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3437   {@ [ cons: =0 , 1   , 2 , 4  ; attrs: movprfx ]
3438      [ &w       , Upl , w , 0  ; *              ] uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3439      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3440      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3441   }
3442 )
3443
3444 ;; -------------------------------------------------------------------------
3445 ;; ---- [INT] Truncation
3446 ;; -------------------------------------------------------------------------
3447 ;; The patterns in this section are synthetic.
3448 ;; -------------------------------------------------------------------------
3449
3450 ;; Truncate to a partial SVE vector from either a full vector or a
3451 ;; wider partial vector.  This is a no-op, because we can just ignore
3452 ;; the unused upper bits of the source.
3453 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3454   [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3455         (truncate:SVE_PARTIAL_I
3456           (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3457   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3458   "#"
3459   "&& reload_completed"
3460   [(set (match_dup 0) (match_dup 1))]
3461   {
3462     operands[1] = aarch64_replace_reg_mode (operands[1],
3463                                             <SVE_PARTIAL_I:MODE>mode);
3464   }
3465 )
3466
3467 ;; -------------------------------------------------------------------------
3468 ;; ---- [INT] Logical inverse
3469 ;; -------------------------------------------------------------------------
3470 ;; Includes:
3471 ;; - CNOT
3472 ;; -------------------------------------------------------------------------
3473
3474 ;; Logical inverse, predicated with a ptrue.
3475 (define_expand "@aarch64_ptrue_cnot<mode>"
3476   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3477         (unspec:SVE_FULL_I
3478           [(unspec:<VPRED>
3479              [(match_operand:<VPRED> 1 "register_operand")
3480               (const_int SVE_KNOWN_PTRUE)
3481               (eq:<VPRED>
3482                 (match_operand:SVE_FULL_I 2 "register_operand")
3483                 (match_dup 3))]
3484              UNSPEC_PRED_Z)
3485            (match_dup 4)
3486            (match_dup 3)]
3487           UNSPEC_SEL))]
3488   "TARGET_SVE"
3489   {
3490     operands[3] = CONST0_RTX (<MODE>mode);
3491     operands[4] = CONST1_RTX (<MODE>mode);
3492   }
3493 )
3494
3495 (define_insn "*cnot<mode>"
3496   [(set (match_operand:SVE_I 0 "register_operand")
3497         (unspec:SVE_I
3498           [(unspec:<VPRED>
3499              [(match_operand:<VPRED> 1 "register_operand")
3500               (const_int SVE_KNOWN_PTRUE)
3501               (eq:<VPRED>
3502                 (match_operand:SVE_I 2 "register_operand")
3503                 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3504              UNSPEC_PRED_Z)
3505            (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3506            (match_dup 3)]
3507           UNSPEC_SEL))]
3508   "TARGET_SVE"
3509   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3510      [ w        , Upl , 0 ; *              ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3511      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3512   }
3513 )
3514
3515 ;; Predicated logical inverse with merging.
3516 (define_expand "@cond_cnot<mode>"
3517   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3518         (unspec:SVE_FULL_I
3519           [(match_operand:<VPRED> 1 "register_operand")
3520            (unspec:SVE_FULL_I
3521              [(unspec:<VPRED>
3522                 [(match_dup 4)
3523                  (const_int SVE_KNOWN_PTRUE)
3524                  (eq:<VPRED>
3525                    (match_operand:SVE_FULL_I 2 "register_operand")
3526                    (match_dup 5))]
3527                 UNSPEC_PRED_Z)
3528               (match_dup 6)
3529               (match_dup 5)]
3530              UNSPEC_SEL)
3531            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3532           UNSPEC_SEL))]
3533   "TARGET_SVE"
3534   {
3535     operands[4] = CONSTM1_RTX (<VPRED>mode);
3536     operands[5] = CONST0_RTX (<MODE>mode);
3537     operands[6] = CONST1_RTX (<MODE>mode);
3538   }
3539 )
3540
3541 ;; Predicated logical inverse, merging with the first input.
3542 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3543   [(set (match_operand:SVE_I 0 "register_operand")
3544         (unspec:SVE_I
3545           [(match_operand:<VPRED> 1 "register_operand")
3546            ;; Logical inverse of operand 2 (as above).
3547            (unspec:SVE_I
3548              [(unspec:<VPRED>
3549                 [(match_operand 5)
3550                  (const_int SVE_KNOWN_PTRUE)
3551                  (eq:<VPRED>
3552                    (match_operand:SVE_I 2 "register_operand")
3553                    (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3554                 UNSPEC_PRED_Z)
3555               (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3556               (match_dup 3)]
3557              UNSPEC_SEL)
3558            (match_dup 2)]
3559           UNSPEC_SEL))]
3560   "TARGET_SVE"
3561   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3562      [ w        , Upl , 0 ; *              ] cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3563      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3564   }
3565   "&& !CONSTANT_P (operands[5])"
3566   {
3567     operands[5] = CONSTM1_RTX (<VPRED>mode);
3568   }
3569 )
3570
3571 ;; Predicated logical inverse, merging with an independent value.
3572 ;;
3573 ;; The earlyclobber isn't needed for the first alternative, but omitting
3574 ;; it would only help the case in which operands 2 and 6 are the same,
3575 ;; which is handled above rather than here.  Marking all the alternatives
3576 ;; as earlyclobber helps to make the instruction more regular to the
3577 ;; register allocator.
3578 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3579   [(set (match_operand:SVE_I 0 "register_operand")
3580         (unspec:SVE_I
3581           [(match_operand:<VPRED> 1 "register_operand")
3582            ;; Logical inverse of operand 2 (as above).
3583            (unspec:SVE_I
3584              [(unspec:<VPRED>
3585                 [(match_operand 5)
3586                  (const_int SVE_KNOWN_PTRUE)
3587                  (eq:<VPRED>
3588                    (match_operand:SVE_I 2 "register_operand")
3589                    (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3590                 UNSPEC_PRED_Z)
3591               (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3592               (match_dup 3)]
3593              UNSPEC_SEL)
3594            (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero")]
3595           UNSPEC_SEL))]
3596   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3597   {@ [ cons: =0 , 1   , 2 , 6  ; attrs: movprfx ]
3598      [ &w       , Upl , w , 0  ; *              ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3599      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3600      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3601   }
3602   "&& !CONSTANT_P (operands[5])"
3603   {
3604     operands[5] = CONSTM1_RTX (<VPRED>mode);
3605   }
3606 )
3607
3608 ;; -------------------------------------------------------------------------
3609 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3610 ;; -------------------------------------------------------------------------
3611 ;; Includes:
3612 ;; - FEXPA
3613 ;; -------------------------------------------------------------------------
3614
3615 ;; Unpredicated unary operations that take an integer and return a float.
3616 (define_insn "@aarch64_sve_<optab><mode>"
3617   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3618         (unspec:SVE_FULL_F
3619           [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3620           SVE_FP_UNARY_INT))]
3621   "TARGET_SVE"
3622   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3623 )
3624
3625 ;; -------------------------------------------------------------------------
3626 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3627 ;; -------------------------------------------------------------------------
3628 ;; Includes:
3629 ;; - FABS
3630 ;; - FNEG
3631 ;; - FRECPE
3632 ;; - FRECPX
3633 ;; - FRINTA
3634 ;; - FRINTI
3635 ;; - FRINTM
3636 ;; - FRINTN
3637 ;; - FRINTP
3638 ;; - FRINTX
3639 ;; - FRINTZ
3640 ;; - FRSQRTE
3641 ;; - FSQRT
3642 ;; -------------------------------------------------------------------------
3643
3644 ;; Unpredicated floating-point unary operations.
3645 (define_insn "@aarch64_sve_<optab><mode>"
3646   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3647         (unspec:SVE_FULL_F
3648           [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3649           SVE_FP_UNARY))]
3650   "TARGET_SVE"
3651   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3652 )
3653
3654 ;; Unpredicated floating-point unary operations.
3655 (define_expand "<optab><mode>2"
3656   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3657         (unspec:SVE_FULL_F
3658           [(match_dup 2)
3659            (const_int SVE_RELAXED_GP)
3660            (match_operand:SVE_FULL_F 1 "register_operand")]
3661           SVE_COND_FP_UNARY_OPTAB))]
3662   "TARGET_SVE"
3663   {
3664     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3665   }
3666 )
3667
3668 ;; Predicated floating-point unary operations.
3669 (define_insn "@aarch64_pred_<optab><mode>"
3670   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3671         (unspec:SVE_FULL_F
3672           [(match_operand:<VPRED> 1 "register_operand")
3673            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3674            (match_operand:SVE_FULL_F 2 "register_operand")]
3675           SVE_COND_FP_UNARY))]
3676   "TARGET_SVE"
3677   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3678      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3679      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3680   }
3681 )
3682
3683 ;; Predicated floating-point unary arithmetic with merging.
3684 (define_expand "@cond_<optab><mode>"
3685   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3686         (unspec:SVE_FULL_F
3687           [(match_operand:<VPRED> 1 "register_operand")
3688            (unspec:SVE_FULL_F
3689              [(match_dup 1)
3690               (const_int SVE_STRICT_GP)
3691               (match_operand:SVE_FULL_F 2 "register_operand")]
3692              SVE_COND_FP_UNARY)
3693            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3694           UNSPEC_SEL))]
3695   "TARGET_SVE"
3696 )
3697
3698 ;; Predicated floating-point unary arithmetic, merging with the first input.
3699 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
3700   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3701         (unspec:SVE_FULL_F
3702           [(match_operand:<VPRED> 1 "register_operand")
3703            (unspec:SVE_FULL_F
3704              [(match_operand 3)
3705               (const_int SVE_RELAXED_GP)
3706               (match_operand:SVE_FULL_F 2 "register_operand")]
3707              SVE_COND_FP_UNARY)
3708            (match_dup 2)]
3709           UNSPEC_SEL))]
3710   "TARGET_SVE"
3711   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3712      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3713      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3714   }
3715   "&& !rtx_equal_p (operands[1], operands[3])"
3716   {
3717     operands[3] = copy_rtx (operands[1]);
3718   }
3719 )
3720
3721 (define_insn "*cond_<optab><mode>_2_strict"
3722   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3723         (unspec:SVE_FULL_F
3724           [(match_operand:<VPRED> 1 "register_operand")
3725            (unspec:SVE_FULL_F
3726              [(match_dup 1)
3727               (const_int SVE_STRICT_GP)
3728               (match_operand:SVE_FULL_F 2 "register_operand")]
3729              SVE_COND_FP_UNARY)
3730            (match_dup 2)]
3731           UNSPEC_SEL))]
3732   "TARGET_SVE"
3733   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
3734      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3735      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3736   }
3737 )
3738
3739 ;; Predicated floating-point unary arithmetic, merging with an independent
3740 ;; value.
3741 ;;
3742 ;; The earlyclobber isn't needed for the first alternative, but omitting
3743 ;; it would only help the case in which operands 2 and 3 are the same,
3744 ;; which is handled above rather than here.  Marking all the alternatives
3745 ;; as earlyclobber helps to make the instruction more regular to the
3746 ;; register allocator.
3747 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
3748   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3749         (unspec:SVE_FULL_F
3750           [(match_operand:<VPRED> 1 "register_operand")
3751            (unspec:SVE_FULL_F
3752              [(match_operand 4)
3753               (const_int SVE_RELAXED_GP)
3754               (match_operand:SVE_FULL_F 2 "register_operand")]
3755              SVE_COND_FP_UNARY)
3756            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3757           UNSPEC_SEL))]
3758   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3759   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3760      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3761      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3762      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3763   }
3764   "&& !rtx_equal_p (operands[1], operands[4])"
3765   {
3766     operands[4] = copy_rtx (operands[1]);
3767   }
3768 )
3769
3770 (define_insn "*cond_<optab><mode>_any_strict"
3771   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3772         (unspec:SVE_FULL_F
3773           [(match_operand:<VPRED> 1 "register_operand")
3774            (unspec:SVE_FULL_F
3775              [(match_dup 1)
3776               (const_int SVE_STRICT_GP)
3777               (match_operand:SVE_FULL_F 2 "register_operand")]
3778              SVE_COND_FP_UNARY)
3779            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3780           UNSPEC_SEL))]
3781   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3782   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
3783      [ &w       , Upl , w , 0  ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3784      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3785      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3786   }
3787 )
3788
3789 ;; -------------------------------------------------------------------------
3790 ;; ---- [FP] Square root
3791 ;; -------------------------------------------------------------------------
3792
3793 (define_expand "sqrt<mode>2"
3794   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3795         (unspec:SVE_FULL_F
3796           [(match_dup 2)
3797            (const_int SVE_RELAXED_GP)
3798            (match_operand:SVE_FULL_F 1 "register_operand")]
3799           UNSPEC_COND_FSQRT))]
3800   "TARGET_SVE"
3801 {
3802   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3803     DONE;
3804   operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3805 })
3806
3807 ;; -------------------------------------------------------------------------
3808 ;; ---- [FP] Reciprocal square root
3809 ;; -------------------------------------------------------------------------
3810
3811 (define_expand "rsqrt<mode>2"
3812   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3813         (unspec:SVE_FULL_SDF
3814           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3815           UNSPEC_RSQRT))]
3816   "TARGET_SVE"
3817 {
3818   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3819   DONE;
3820 })
3821
3822 (define_expand "@aarch64_rsqrte<mode>"
3823   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3824         (unspec:SVE_FULL_SDF
3825           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3826           UNSPEC_RSQRTE))]
3827   "TARGET_SVE"
3828 )
3829
3830 (define_expand "@aarch64_rsqrts<mode>"
3831   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3832         (unspec:SVE_FULL_SDF
3833           [(match_operand:SVE_FULL_SDF 1 "register_operand")
3834            (match_operand:SVE_FULL_SDF 2 "register_operand")]
3835           UNSPEC_RSQRTS))]
3836   "TARGET_SVE"
3837 )
3838
3839 ;; -------------------------------------------------------------------------
3840 ;; ---- [PRED] Inverse
3841 ;; -------------------------------------------------------------------------
3842 ;; Includes:
3843 ;; - NOT
3844 ;; -------------------------------------------------------------------------
3845
3846 ;; Unpredicated predicate inverse.
3847 (define_expand "one_cmpl<mode>2"
3848   [(set (match_operand:PRED_ALL 0 "register_operand")
3849         (and:PRED_ALL
3850           (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3851           (match_dup 2)))]
3852   "TARGET_SVE"
3853   {
3854     operands[2] = aarch64_ptrue_reg (<MODE>mode);
3855   }
3856 )
3857
3858 ;; Predicated predicate inverse.
3859 (define_insn "*one_cmpl<mode>3"
3860   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3861         (and:PRED_ALL
3862           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3863           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3864   "TARGET_SVE"
3865   "not\t%0.b, %1/z, %2.b"
3866 )
3867
3868 ;; =========================================================================
3869 ;; == Binary arithmetic
3870 ;; =========================================================================
3871
3872 ;; -------------------------------------------------------------------------
3873 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3874 ;; -------------------------------------------------------------------------
3875 ;; Includes:
3876 ;; - ADD    (merging form only)
3877 ;; - AND    (merging form only)
3878 ;; - ASR    (merging form only)
3879 ;; - EOR    (merging form only)
3880 ;; - LSL    (merging form only)
3881 ;; - LSR    (merging form only)
3882 ;; - MUL
3883 ;; - ORR    (merging form only)
3884 ;; - SMAX
3885 ;; - SMIN
3886 ;; - SQADD  (SVE2 merging form only)
3887 ;; - SQSUB  (SVE2 merging form only)
3888 ;; - SUB    (merging form only)
3889 ;; - UMAX
3890 ;; - UMIN
3891 ;; - UQADD  (SVE2 merging form only)
3892 ;; - UQSUB  (SVE2 merging form only)
3893 ;; -------------------------------------------------------------------------
3894
3895 ;; Unpredicated integer binary operations that have an immediate form.
3896 (define_expand "<optab><mode>3"
3897   [(set (match_operand:SVE_I 0 "register_operand")
3898         (unspec:SVE_I
3899           [(match_dup 3)
3900            (SVE_INT_BINARY_MULTI:SVE_I
3901              (match_operand:SVE_I 1 "register_operand")
3902              (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3903           UNSPEC_PRED_X))]
3904   "TARGET_SVE"
3905   {
3906     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3907   }
3908 )
3909
3910 ;; Unpredicated integer binary operations that have an immediate form.
3911 ;; Advanced SIMD does not support vector DImode MUL, but SVE does.
3912 ;; Make use of the overlap between Z and V registers to implement the V2DI
3913 ;; optab for TARGET_SVE.  The mulvnx2di3 expander can
3914 ;; handle the TARGET_SVE2 case transparently.
3915 (define_expand "mul<mode>3"
3916   [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3917         (unspec:SVE_I_SIMD_DI
3918           [(match_dup 3)
3919            (mult:SVE_I_SIMD_DI
3920              (match_operand:SVE_I_SIMD_DI 1 "register_operand")
3921              (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))]
3922           UNSPEC_PRED_X))]
3923   "TARGET_SVE"
3924   {
3925     /* SVE2 supports the MUL (vectors, unpredicated) form.  Emit the simple
3926        pattern for it here rather than splitting off the MULT expander
3927        separately.  */
3928     if (TARGET_SVE2)
3929       {
3930         emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
3931                                                    operands[1], operands[2]));
3932         DONE;
3933       }
3934     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3935   }
3936 )
3937
3938 ;; Integer binary operations that have an immediate form, predicated
3939 ;; with a PTRUE.  We don't actually need the predicate for the first
3940 ;; and third alternatives, but using Upa or X isn't likely to gain much
3941 ;; and would make the instruction seem less uniform to the register
3942 ;; allocator.
3943 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3944   [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3945         (unspec:SVE_I_SIMD_DI
3946           [(match_operand:<VPRED> 1 "register_operand")
3947            (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3948              (match_operand:SVE_I_SIMD_DI 2 "register_operand")
3949              (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
3950           UNSPEC_PRED_X))]
3951   "TARGET_SVE"
3952   {@ [ cons: =0 , 1   , 2  , 3             ; attrs: movprfx ]
3953      [ w        , Upl , %0 , <sve_imm_con> ; *              ] #
3954      [ w        , Upl , 0  , w             ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3955      [ ?&w      , Upl , w  , <sve_imm_con> ; yes            ] #
3956      [ ?&w      , Upl , w  , w             ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3957   }
3958   ; Split the unpredicated form after reload, so that we don't have
3959   ; the unnecessary PTRUE.
3960   "&& reload_completed
3961    && !register_operand (operands[3], <MODE>mode)"
3962   [(set (match_dup 0)
3963         (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))]
3964   ""
3965 )
3966
3967 ;; Unpredicated binary operations with a constant (post-RA only).
3968 ;; These are generated by splitting a predicated instruction whose
3969 ;; predicate is unused.
3970 (define_insn "*post_ra_<optab><mode>3"
3971   [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w")
3972         (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3973           (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w")
3974           (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3975   "TARGET_SVE && reload_completed"
3976   "@
3977    <sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2
3978    movprfx\t%Z0, %Z1\;<sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2"
3979   [(set_attr "movprfx" "*,yes")]
3980 )
3981
3982 ;; Predicated integer operations with merging.
3983 (define_expand "@cond_<optab><mode>"
3984   [(set (match_operand:SVE_I 0 "register_operand")
3985         (unspec:SVE_I
3986           [(match_operand:<VPRED> 1 "register_operand")
3987            (SVE_INT_BINARY:SVE_I
3988              (match_operand:SVE_I 2 "register_operand")
3989              (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
3990            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3991           UNSPEC_SEL))]
3992   "TARGET_SVE"
3993 )
3994
3995 ;; Predicated integer operations, merging with the first input.
3996 (define_insn "*cond_<optab><mode>_2"
3997   [(set (match_operand:SVE_I 0 "register_operand")
3998         (unspec:SVE_I
3999           [(match_operand:<VPRED> 1 "register_operand")
4000            (SVE_INT_BINARY:SVE_I
4001              (match_operand:SVE_I 2 "register_operand")
4002              (match_operand:SVE_I 3 "register_operand"))
4003            (match_dup 2)]
4004           UNSPEC_SEL))]
4005   "TARGET_SVE"
4006   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4007      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4008      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4009   }
4010 )
4011
4012 ;; Predicated integer operations, merging with the second input.
4013 (define_insn "*cond_<optab><mode>_3"
4014   [(set (match_operand:SVE_I 0 "register_operand")
4015         (unspec:SVE_I
4016           [(match_operand:<VPRED> 1 "register_operand")
4017            (SVE_INT_BINARY:SVE_I
4018              (match_operand:SVE_I 2 "register_operand")
4019              (match_operand:SVE_I 3 "register_operand"))
4020            (match_dup 3)]
4021           UNSPEC_SEL))]
4022   "TARGET_SVE"
4023   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4024      [ w        , Upl , w , 0 ; *              ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4025      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4026   }
4027 )
4028
4029 ;; Predicated integer operations, merging with an independent value.
4030 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4031   [(set (match_operand:SVE_I 0 "register_operand")
4032         (unspec:SVE_I
4033           [(match_operand:<VPRED> 1 "register_operand")
4034            (SVE_INT_BINARY:SVE_I
4035              (match_operand:SVE_I 2 "register_operand")
4036              (match_operand:SVE_I 3 "register_operand"))
4037            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4038           UNSPEC_SEL))]
4039   "TARGET_SVE
4040    && !rtx_equal_p (operands[2], operands[4])
4041    && !rtx_equal_p (operands[3], operands[4])"
4042   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
4043      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4044      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4045      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4046      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4047      [ ?&w      , Upl , w , w , w   ] #
4048   }
4049   "&& reload_completed
4050    && register_operand (operands[4], <MODE>mode)
4051    && !rtx_equal_p (operands[0], operands[4])"
4052   {
4053     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4054                                              operands[4], operands[1]));
4055     operands[4] = operands[2] = operands[0];
4056   }
4057   [(set_attr "movprfx" "yes")]
4058 )
4059
4060 ;; -------------------------------------------------------------------------
4061 ;; ---- [INT] Addition
4062 ;; -------------------------------------------------------------------------
4063 ;; Includes:
4064 ;; - ADD
4065 ;; - DECB
4066 ;; - DECD
4067 ;; - DECH
4068 ;; - DECW
4069 ;; - INCB
4070 ;; - INCD
4071 ;; - INCH
4072 ;; - INCW
4073 ;; - SUB
4074 ;; -------------------------------------------------------------------------
4075
4076 (define_insn "add<mode>3"
4077   [(set (match_operand:SVE_I 0 "register_operand")
4078         (plus:SVE_I
4079           (match_operand:SVE_I 1 "register_operand")
4080           (match_operand:SVE_I 2 "aarch64_sve_add_operand")))]
4081   "TARGET_SVE"
4082   {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
4083      [ w        , %0 , vsa ; *              ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
4084      [ w        , 0  , vsn ; *              ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2
4085      [ w        , 0  , vsi ; *              ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]);
4086      [ ?w       , w  , vsa ; yes            ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
4087      [ ?w       , w  , vsn ; yes            ] movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
4088      [ w        , w  , w   ; *              ] add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4089   }
4090 )
4091
4092 ;; Merging forms are handled through SVE_INT_BINARY.
4093
4094 ;; -------------------------------------------------------------------------
4095 ;; ---- [INT] Subtraction
4096 ;; -------------------------------------------------------------------------
4097 ;; Includes:
4098 ;; - SUB
4099 ;; - SUBR
4100 ;; -------------------------------------------------------------------------
4101
4102 (define_insn "sub<mode>3"
4103   [(set (match_operand:SVE_I 0 "register_operand")
4104         (minus:SVE_I
4105           (match_operand:SVE_I 1 "aarch64_sve_arith_operand")
4106           (match_operand:SVE_I 2 "register_operand")))]
4107   "TARGET_SVE"
4108   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
4109      [ w        , w   , w ; *              ] sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4110      [ w        , vsa , 0 ; *              ] subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4111      [ ?&w      , vsa , w ; yes            ] movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4112   }
4113 )
4114
4115 ;; Merging forms are handled through SVE_INT_BINARY.
4116
4117 ;; -------------------------------------------------------------------------
4118 ;; ---- [INT] Take address
4119 ;; -------------------------------------------------------------------------
4120 ;; Includes:
4121 ;; - ADR
4122 ;; -------------------------------------------------------------------------
4123
4124 ;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
4125 ;; but the svadrb intrinsics should preserve the user's choice.
4126 (define_insn "@aarch64_adr<mode>"
4127   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
4128         (unspec:SVE_FULL_SDI
4129           [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
4130            (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
4131           UNSPEC_ADR))]
4132   "TARGET_SVE && TARGET_NON_STREAMING"
4133   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
4134 )
4135
4136 ;; Same, but with the offset being sign-extended from the low 32 bits.
4137 (define_insn_and_rewrite "*aarch64_adr_sxtw"
4138   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4139         (unspec:VNx2DI
4140           [(match_operand:VNx2DI 1 "register_operand" "w")
4141            (unspec:VNx2DI
4142              [(match_operand 3)
4143               (sign_extend:VNx2DI
4144                 (truncate:VNx2SI
4145                   (match_operand:VNx2DI 2 "register_operand" "w")))]
4146              UNSPEC_PRED_X)]
4147           UNSPEC_ADR))]
4148   "TARGET_SVE && TARGET_NON_STREAMING"
4149   "adr\t%0.d, [%1.d, %2.d, sxtw]"
4150   "&& !CONSTANT_P (operands[3])"
4151   {
4152     operands[3] = CONSTM1_RTX (VNx2BImode);
4153   }
4154 )
4155
4156 ;; Same, but with the offset being zero-extended from the low 32 bits.
4157 (define_insn "*aarch64_adr_uxtw_unspec"
4158   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4159         (unspec:VNx2DI
4160           [(match_operand:VNx2DI 1 "register_operand" "w")
4161            (and:VNx2DI
4162              (match_operand:VNx2DI 2 "register_operand" "w")
4163              (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
4164           UNSPEC_ADR))]
4165   "TARGET_SVE && TARGET_NON_STREAMING"
4166   "adr\t%0.d, [%1.d, %2.d, uxtw]"
4167 )
4168
4169 ;; Same, matching as a PLUS rather than unspec.
4170 (define_insn "*aarch64_adr_uxtw_and"
4171   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4172         (plus:VNx2DI
4173           (and:VNx2DI
4174             (match_operand:VNx2DI 2 "register_operand" "w")
4175             (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
4176           (match_operand:VNx2DI 1 "register_operand" "w")))]
4177   "TARGET_SVE && TARGET_NON_STREAMING"
4178   "adr\t%0.d, [%1.d, %2.d, uxtw]"
4179 )
4180
4181 ;; ADR with a nonzero shift.
4182 (define_expand "@aarch64_adr<mode>_shift"
4183   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4184         (plus:SVE_FULL_SDI
4185           (unspec:SVE_FULL_SDI
4186             [(match_dup 4)
4187              (ashift:SVE_FULL_SDI
4188                (match_operand:SVE_FULL_SDI 2 "register_operand")
4189                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
4190             UNSPEC_PRED_X)
4191           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
4192   "TARGET_SVE && TARGET_NON_STREAMING"
4193   {
4194     operands[4] = CONSTM1_RTX (<VPRED>mode);
4195   }
4196 )
4197
4198 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
4199   [(set (match_operand:SVE_24I 0 "register_operand" "=w")
4200         (plus:SVE_24I
4201           (unspec:SVE_24I
4202             [(match_operand 4)
4203              (ashift:SVE_24I
4204                (match_operand:SVE_24I 2 "register_operand" "w")
4205                (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
4206             UNSPEC_PRED_X)
4207           (match_operand:SVE_24I 1 "register_operand" "w")))]
4208   "TARGET_SVE && TARGET_NON_STREAMING"
4209   "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
4210   "&& !CONSTANT_P (operands[4])"
4211   {
4212     operands[4] = CONSTM1_RTX (<VPRED>mode);
4213   }
4214 )
4215
4216 ;; Same, but with the index being sign-extended from the low 32 bits.
4217 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
4218   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4219         (plus:VNx2DI
4220           (unspec:VNx2DI
4221             [(match_operand 4)
4222              (ashift:VNx2DI
4223                (unspec:VNx2DI
4224                  [(match_operand 5)
4225                   (sign_extend:VNx2DI
4226                     (truncate:VNx2SI
4227                       (match_operand:VNx2DI 2 "register_operand" "w")))]
4228                  UNSPEC_PRED_X)
4229                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4230             UNSPEC_PRED_X)
4231           (match_operand:VNx2DI 1 "register_operand" "w")))]
4232   "TARGET_SVE && TARGET_NON_STREAMING"
4233   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
4234   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4235   {
4236     operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
4237   }
4238 )
4239
4240 ;; Same, but with the index being zero-extended from the low 32 bits.
4241 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
4242   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4243         (plus:VNx2DI
4244           (unspec:VNx2DI
4245             [(match_operand 5)
4246              (ashift:VNx2DI
4247                (and:VNx2DI
4248                  (match_operand:VNx2DI 2 "register_operand" "w")
4249                  (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
4250                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4251             UNSPEC_PRED_X)
4252           (match_operand:VNx2DI 1 "register_operand" "w")))]
4253   "TARGET_SVE && TARGET_NON_STREAMING"
4254   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
4255   "&& !CONSTANT_P (operands[5])"
4256   {
4257     operands[5] = CONSTM1_RTX (VNx2BImode);
4258   }
4259 )
4260
4261 ;; -------------------------------------------------------------------------
4262 ;; ---- [INT] Absolute difference
4263 ;; -------------------------------------------------------------------------
4264 ;; Includes:
4265 ;; - SABD
4266 ;; - UABD
4267 ;; -------------------------------------------------------------------------
4268
4269 ;; Unpredicated integer absolute difference.
4270 (define_expand "<su>abd<mode>3"
4271   [(use (match_operand:SVE_I 0 "register_operand"))
4272    (USMAX:SVE_I
4273      (match_operand:SVE_I 1 "register_operand")
4274      (match_operand:SVE_I 2 "register_operand"))]
4275   "TARGET_SVE"
4276   {
4277     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
4278     emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
4279                                                operands[2]));
4280     DONE;
4281   }
4282 )
4283
4284 ;; Predicated integer absolute difference.
4285 (define_insn "@aarch64_pred_<su>abd<mode>"
4286   [(set (match_operand:SVE_I 0 "register_operand")
4287         (minus:SVE_I
4288           (unspec:SVE_I
4289             [(match_operand:<VPRED> 1 "register_operand")
4290              (USMAX:SVE_I
4291                (match_operand:SVE_I 2 "register_operand")
4292                (match_operand:SVE_I 3 "register_operand"))]
4293             UNSPEC_PRED_X)
4294           (unspec:SVE_I
4295             [(match_dup 1)
4296              (<max_opp>:SVE_I
4297                (match_dup 2)
4298                (match_dup 3))]
4299             UNSPEC_PRED_X)))]
4300   "TARGET_SVE"
4301   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
4302      [ w        , Upl , %0 , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4303      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4304   }
4305 )
4306
4307 (define_expand "@aarch64_cond_<su>abd<mode>"
4308   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4309         (unspec:SVE_FULL_I
4310           [(match_operand:<VPRED> 1 "register_operand")
4311            (minus:SVE_FULL_I
4312              (unspec:SVE_FULL_I
4313                [(match_dup 1)
4314                 (USMAX:SVE_FULL_I
4315                   (match_operand:SVE_FULL_I 2 "register_operand")
4316                   (match_operand:SVE_FULL_I 3 "register_operand"))]
4317                UNSPEC_PRED_X)
4318              (unspec:SVE_FULL_I
4319                [(match_dup 1)
4320                 (<max_opp>:SVE_FULL_I
4321                   (match_dup 2)
4322                   (match_dup 3))]
4323                UNSPEC_PRED_X))
4324            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4325           UNSPEC_SEL))]
4326   "TARGET_SVE"
4327 {
4328   if (rtx_equal_p (operands[3], operands[4]))
4329     std::swap (operands[2], operands[3]);
4330 })
4331
4332 ;; Predicated integer absolute difference, merging with the first input.
4333 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
4334   [(set (match_operand:SVE_I 0 "register_operand")
4335         (unspec:SVE_I
4336           [(match_operand:<VPRED> 1 "register_operand")
4337            (minus:SVE_I
4338              (unspec:SVE_I
4339                [(match_operand 4)
4340                 (USMAX:SVE_I
4341                   (match_operand:SVE_I 2 "register_operand")
4342                   (match_operand:SVE_I 3 "register_operand"))]
4343                UNSPEC_PRED_X)
4344              (unspec:SVE_I
4345                [(match_operand 5)
4346                 (<max_opp>:SVE_I
4347                   (match_dup 2)
4348                   (match_dup 3))]
4349                UNSPEC_PRED_X))
4350            (match_dup 2)]
4351           UNSPEC_SEL))]
4352   "TARGET_SVE"
4353   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4354      [ w        , Upl , 0 , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4355      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4356   }
4357   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4358   {
4359     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4360   }
4361 )
4362
4363 ;; Predicated integer absolute difference, merging with the second input.
4364 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
4365   [(set (match_operand:SVE_I 0 "register_operand")
4366         (unspec:SVE_I
4367           [(match_operand:<VPRED> 1 "register_operand")
4368            (minus:SVE_I
4369              (unspec:SVE_I
4370                [(match_operand 4)
4371                 (USMAX:SVE_I
4372                   (match_operand:SVE_I 2 "register_operand")
4373                   (match_operand:SVE_I 3 "register_operand"))]
4374                UNSPEC_PRED_X)
4375              (unspec:SVE_I
4376                [(match_operand 5)
4377                 (<max_opp>:SVE_I
4378                   (match_dup 2)
4379                   (match_dup 3))]
4380                UNSPEC_PRED_X))
4381            (match_dup 3)]
4382           UNSPEC_SEL))]
4383   "TARGET_SVE"
4384   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4385      [ w        , Upl , w , 0 ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4386      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4387   }
4388   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4389   {
4390     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4391   }
4392 )
4393
4394 ;; Predicated integer absolute difference, merging with an independent value.
4395 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
4396   [(set (match_operand:SVE_I 0 "register_operand")
4397         (unspec:SVE_I
4398           [(match_operand:<VPRED> 1 "register_operand")
4399            (minus:SVE_I
4400              (unspec:SVE_I
4401                [(match_operand 5)
4402                 (USMAX:SVE_I
4403                   (match_operand:SVE_I 2 "register_operand")
4404                   (match_operand:SVE_I 3 "register_operand"))]
4405                UNSPEC_PRED_X)
4406              (unspec:SVE_I
4407                [(match_operand 6)
4408                 (<max_opp>:SVE_I
4409                   (match_dup 2)
4410                   (match_dup 3))]
4411                UNSPEC_PRED_X))
4412            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4413           UNSPEC_SEL))]
4414   "TARGET_SVE
4415    && !rtx_equal_p (operands[2], operands[4])
4416    && !rtx_equal_p (operands[3], operands[4])"
4417   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
4418      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4419      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4420      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4421      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4422      [ ?&w      , Upl , w , w , w   ] #
4423   }
4424   "&& 1"
4425   {
4426     if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4427       operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4428     else if (reload_completed
4429              && register_operand (operands[4], <MODE>mode)
4430              && !rtx_equal_p (operands[0], operands[4]))
4431       {
4432         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4433                                                  operands[4], operands[1]));
4434         operands[4] = operands[2] = operands[0];
4435       }
4436     else
4437       FAIL;
4438   }
4439   [(set_attr "movprfx" "yes")]
4440 )
4441
4442 ;; -------------------------------------------------------------------------
4443 ;; ---- [INT] Saturating addition and subtraction
4444 ;; -------------------------------------------------------------------------
4445 ;; - SQADD
4446 ;; - SQSUB
4447 ;; - UQADD
4448 ;; - UQSUB
4449 ;; -------------------------------------------------------------------------
4450
4451 ;; Unpredicated saturating signed addition and subtraction.
4452 (define_insn "<su_optab>s<addsub><mode>3"
4453   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4454         (SBINQOPS:SVE_FULL_I
4455           (match_operand:SVE_FULL_I 1 "register_operand")
4456           (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand")))]
4457   "TARGET_SVE"
4458   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
4459      [ w        , 0 , vsQ ; *              ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4460      [ w        , 0 , vsS ; *              ] <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4461      [ ?&w      , w , vsQ ; yes            ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4462      [ ?&w      , w , vsS ; yes            ] movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4463      [ w        , w , w   ; *              ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4464   }
4465 )
4466
4467 ;; Unpredicated saturating unsigned addition and subtraction.
4468 (define_insn "<su_optab>s<addsub><mode>3"
4469   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4470         (UBINQOPS:SVE_FULL_I
4471           (match_operand:SVE_FULL_I 1 "register_operand")
4472           (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand")))]
4473   "TARGET_SVE"
4474   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
4475      [ w        , 0 , vsa ; *              ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4476      [ ?&w      , w , vsa ; yes            ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4477      [ w        , w , w   ; *              ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4478   }
4479 )
4480
4481 ;; -------------------------------------------------------------------------
4482 ;; ---- [INT] Highpart multiplication
4483 ;; -------------------------------------------------------------------------
4484 ;; Includes:
4485 ;; - SMULH
4486 ;; - UMULH
4487 ;; -------------------------------------------------------------------------
4488
4489 ;; Unpredicated highpart multiplication.
4490 (define_expand "<su>mul<mode>3_highpart"
4491   [(set (match_operand:SVE_I 0 "register_operand")
4492         (unspec:SVE_I
4493           [(match_dup 3)
4494            (unspec:SVE_I
4495              [(match_operand:SVE_I 1 "register_operand")
4496               (match_operand:SVE_I 2 "register_operand")]
4497              MUL_HIGHPART)]
4498           UNSPEC_PRED_X))]
4499   "TARGET_SVE"
4500   {
4501     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4502   }
4503 )
4504
4505 ;; Predicated highpart multiplication.
4506 (define_insn "@aarch64_pred_<optab><mode>"
4507   [(set (match_operand:SVE_I 0 "register_operand")
4508         (unspec:SVE_I
4509           [(match_operand:<VPRED> 1 "register_operand")
4510            (unspec:SVE_I
4511              [(match_operand:SVE_I 2 "register_operand")
4512               (match_operand:SVE_I 3 "register_operand")]
4513              MUL_HIGHPART)]
4514           UNSPEC_PRED_X))]
4515   "TARGET_SVE"
4516   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
4517      [ w        , Upl , %0 , w ; *              ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4518      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4519   }
4520 )
4521
4522 ;; Predicated highpart multiplications with merging.
4523 (define_expand "@cond_<optab><mode>"
4524   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4525         (unspec:SVE_FULL_I
4526           [(match_operand:<VPRED> 1 "register_operand")
4527            (unspec:SVE_FULL_I
4528              [(match_operand:SVE_FULL_I 2 "register_operand")
4529               (match_operand:SVE_FULL_I 3 "register_operand")]
4530              MUL_HIGHPART)
4531            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4532           UNSPEC_SEL))]
4533   "TARGET_SVE"
4534 {
4535   /* Only target code is aware of these operations, so we don't need
4536      to handle the fully-general case.  */
4537   gcc_assert (rtx_equal_p (operands[2], operands[4])
4538               || CONSTANT_P (operands[4]));
4539 })
4540
4541 ;; Predicated highpart multiplications, merging with the first input.
4542 (define_insn "*cond_<optab><mode>_2"
4543   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4544         (unspec:SVE_FULL_I
4545           [(match_operand:<VPRED> 1 "register_operand")
4546            (unspec:SVE_FULL_I
4547              [(match_operand:SVE_FULL_I 2 "register_operand")
4548               (match_operand:SVE_FULL_I 3 "register_operand")]
4549              MUL_HIGHPART)
4550            (match_dup 2)]
4551           UNSPEC_SEL))]
4552   "TARGET_SVE"
4553   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4554      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4555      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4556   }
4557 )
4558
4559 ;; Predicated highpart multiplications, merging with zero.
4560 (define_insn "*cond_<optab><mode>_z"
4561   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4562         (unspec:SVE_FULL_I
4563           [(match_operand:<VPRED> 1 "register_operand")
4564            (unspec:SVE_FULL_I
4565              [(match_operand:SVE_FULL_I 2 "register_operand")
4566               (match_operand:SVE_FULL_I 3 "register_operand")]
4567              MUL_HIGHPART)
4568            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4569           UNSPEC_SEL))]
4570   "TARGET_SVE"
4571   {@ [ cons: =0 , 1   , 2  , 3  ]
4572      [ &w       , Upl , %0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4573      [ &w       , Upl , w  , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4574   }
4575   [(set_attr "movprfx" "yes")])
4576
4577 ;; -------------------------------------------------------------------------
4578 ;; ---- [INT] Division
4579 ;; -------------------------------------------------------------------------
4580 ;; Includes:
4581 ;; - SDIV
4582 ;; - SDIVR
4583 ;; - UDIV
4584 ;; - UDIVR
4585 ;; -------------------------------------------------------------------------
4586
4587 ;; Unpredicated integer division.
4588 ;; SVE has vector integer divisions, unlike Advanced SIMD.
4589 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
4590 ;; optabs to the midend.
4591 (define_expand "<optab><mode>3"
4592   [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4593         (unspec:SVE_FULL_SDI_SIMD
4594           [(match_dup 3)
4595            (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4596              (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
4597              (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
4598           UNSPEC_PRED_X))]
4599   "TARGET_SVE"
4600   {
4601     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4602   }
4603 )
4604
4605 ;; Integer division predicated with a PTRUE.
4606 (define_insn "@aarch64_pred_<optab><mode>"
4607   [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4608         (unspec:SVE_FULL_SDI_SIMD
4609           [(match_operand:<VPRED> 1 "register_operand")
4610            (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4611              (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
4612              (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
4613           UNSPEC_PRED_X))]
4614   "TARGET_SVE"
4615   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4616      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4617      [ w        , Upl , w , 0 ; *              ] <sve_int_op>r\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z2.<Vetype>
4618      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4619   }
4620 )
4621
4622 ;; Predicated integer division with merging.
4623 (define_expand "@cond_<optab><mode>"
4624   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4625         (unspec:SVE_FULL_SDI
4626           [(match_operand:<VPRED> 1 "register_operand")
4627            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4628              (match_operand:SVE_FULL_SDI 2 "register_operand")
4629              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4630            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4631           UNSPEC_SEL))]
4632   "TARGET_SVE"
4633 )
4634
4635 ;; Predicated integer division, merging with the first input.
4636 (define_insn "*cond_<optab><mode>_2"
4637   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4638         (unspec:SVE_FULL_SDI
4639           [(match_operand:<VPRED> 1 "register_operand")
4640            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4641              (match_operand:SVE_FULL_SDI 2 "register_operand")
4642              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4643            (match_dup 2)]
4644           UNSPEC_SEL))]
4645   "TARGET_SVE"
4646   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4647      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4648      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4649   }
4650 )
4651
4652 ;; Predicated integer division, merging with the second input.
4653 (define_insn "*cond_<optab><mode>_3"
4654   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4655         (unspec:SVE_FULL_SDI
4656           [(match_operand:<VPRED> 1 "register_operand")
4657            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4658              (match_operand:SVE_FULL_SDI 2 "register_operand")
4659              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4660            (match_dup 3)]
4661           UNSPEC_SEL))]
4662   "TARGET_SVE"
4663   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4664      [ w        , Upl , w , 0 ; *              ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4665      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4666   }
4667 )
4668
4669 ;; Predicated integer division, merging with an independent value.
4670 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4671   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4672         (unspec:SVE_FULL_SDI
4673           [(match_operand:<VPRED> 1 "register_operand")
4674            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4675              (match_operand:SVE_FULL_SDI 2 "register_operand")
4676              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4677            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4678           UNSPEC_SEL))]
4679   "TARGET_SVE
4680    && !rtx_equal_p (operands[2], operands[4])
4681    && !rtx_equal_p (operands[3], operands[4])"
4682   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
4683      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4684      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4685      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4686      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4687      [ ?&w      , Upl , w , w , w   ] #
4688   }
4689   "&& reload_completed
4690    && register_operand (operands[4], <MODE>mode)
4691    && !rtx_equal_p (operands[0], operands[4])"
4692   {
4693     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4694                                              operands[4], operands[1]));
4695     operands[4] = operands[2] = operands[0];
4696   }
4697   [(set_attr "movprfx" "yes")]
4698 )
4699
4700 ;; -------------------------------------------------------------------------
4701 ;; ---- [INT] Binary logical operations
4702 ;; -------------------------------------------------------------------------
4703 ;; Includes:
4704 ;; - AND
4705 ;; - EOR
4706 ;; - ORR
4707 ;; -------------------------------------------------------------------------
4708
4709 ;; Unpredicated integer binary logical operations.
4710 (define_insn "<optab><mode>3"
4711   [(set (match_operand:SVE_I 0 "register_operand")
4712         (LOGICAL:SVE_I
4713           (match_operand:SVE_I 1 "register_operand")
4714           (match_operand:SVE_I 2 "aarch64_sve_logical_operand")))]
4715   "TARGET_SVE"
4716   {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
4717      [ w        , %0 , vsl ; *              ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4718      [ ?w       , w  , vsl ; yes            ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4719      [ w        , w  , w   ; *              ] <logical>\t%0.d, %1.d, %2.d
4720   }
4721 )
4722
4723 ;; Merging forms are handled through SVE_INT_BINARY.
4724
4725 ;; -------------------------------------------------------------------------
4726 ;; ---- [INT] Binary logical operations (inverted second input)
4727 ;; -------------------------------------------------------------------------
4728 ;; Includes:
4729 ;; - BIC
4730 ;; -------------------------------------------------------------------------
4731
4732 ;; Unpredicated BIC; andn named pattern.
4733 (define_expand "andn<mode>3"
4734   [(set (match_operand:SVE_I 0 "register_operand")
4735         (and:SVE_I
4736           (unspec:SVE_I
4737             [(match_dup 3)
4738              (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4739             UNSPEC_PRED_X)
4740           (match_operand:SVE_I 1 "register_operand")))]
4741   "TARGET_SVE"
4742   {
4743     operands[3] = CONSTM1_RTX (<VPRED>mode);
4744   }
4745 )
4746
4747 ;; Predicated BIC.
4748 (define_insn_and_rewrite "*bic<mode>3"
4749   [(set (match_operand:SVE_I 0 "register_operand" "=w")
4750         (and:SVE_I
4751           (unspec:SVE_I
4752             [(match_operand 3)
4753              (not:SVE_I
4754                (match_operand:SVE_I 2 "register_operand" "w"))]
4755             UNSPEC_PRED_X)
4756           (match_operand:SVE_I 1 "register_operand" "w")))]
4757   "TARGET_SVE"
4758   "bic\t%0.d, %1.d, %2.d"
4759   "&& !CONSTANT_P (operands[3])"
4760   {
4761     operands[3] = CONSTM1_RTX (<VPRED>mode);
4762   }
4763 )
4764
4765 ;; Predicated BIC with merging.
4766 (define_expand "@cond_bic<mode>"
4767   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4768         (unspec:SVE_FULL_I
4769           [(match_operand:<VPRED> 1 "register_operand")
4770            (and:SVE_FULL_I
4771              (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4772              (match_operand:SVE_FULL_I 2 "register_operand"))
4773            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4774           UNSPEC_SEL))]
4775   "TARGET_SVE"
4776 )
4777
4778 ;; Predicated integer BIC, merging with the first input.
4779 (define_insn "*cond_bic<mode>_2"
4780   [(set (match_operand:SVE_I 0 "register_operand")
4781         (unspec:SVE_I
4782           [(match_operand:<VPRED> 1 "register_operand")
4783            (and:SVE_I
4784              (not:SVE_I
4785                (match_operand:SVE_I 3 "register_operand"))
4786              (match_operand:SVE_I 2 "register_operand"))
4787            (match_dup 2)]
4788           UNSPEC_SEL))]
4789   "TARGET_SVE"
4790   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
4791      [ w        , Upl , 0 , w ; *              ] bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4792      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4793   }
4794 )
4795
4796 ;; Predicated integer BIC, merging with an independent value.
4797 (define_insn_and_rewrite "*cond_bic<mode>_any"
4798   [(set (match_operand:SVE_I 0 "register_operand")
4799         (unspec:SVE_I
4800           [(match_operand:<VPRED> 1 "register_operand")
4801            (and:SVE_I
4802              (not:SVE_I
4803                (match_operand:SVE_I 3 "register_operand"))
4804              (match_operand:SVE_I 2 "register_operand"))
4805            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4806           UNSPEC_SEL))]
4807   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4808   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
4809      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4810      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4811      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4812      [ ?&w      , Upl , w , w , w   ] #
4813   }
4814   "&& reload_completed
4815    && register_operand (operands[4], <MODE>mode)
4816    && !rtx_equal_p (operands[0], operands[4])"
4817   {
4818     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4819                                              operands[4], operands[1]));
4820     operands[4] = operands[2] = operands[0];
4821   }
4822   [(set_attr "movprfx" "yes")]
4823 )
4824
4825 ;; -------------------------------------------------------------------------
4826 ;; ---- [INT] Shifts (rounding towards -Inf)
4827 ;; -------------------------------------------------------------------------
4828 ;; Includes:
4829 ;; - ASR
4830 ;; - ASRR
4831 ;; - LSL
4832 ;; - LSLR
4833 ;; - LSR
4834 ;; - LSRR
4835 ;; -------------------------------------------------------------------------
4836
4837 ;; Unpredicated shift by a scalar, which expands into one of the vector
4838 ;; shifts below.
4839 (define_expand "<ASHIFT:optab><mode>3"
4840   [(set (match_operand:SVE_I 0 "register_operand")
4841         (ASHIFT:SVE_I
4842           (match_operand:SVE_I 1 "register_operand")
4843           (match_operand:<VEL> 2 "general_operand")))]
4844   "TARGET_SVE"
4845   {
4846     rtx amount;
4847     if (CONST_INT_P (operands[2]))
4848       {
4849         amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4850         if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4851           amount = force_reg (<MODE>mode, amount);
4852       }
4853     else
4854       {
4855         amount = convert_to_mode (<VEL>mode, operands[2], 0);
4856         amount = expand_vector_broadcast (<MODE>mode, amount);
4857       }
4858     emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4859     DONE;
4860   }
4861 )
4862
4863 ;; Unpredicated shift by a vector.
4864 (define_expand "v<optab><mode>3"
4865   [(set (match_operand:SVE_I 0 "register_operand")
4866         (unspec:SVE_I
4867           [(match_dup 3)
4868            (ASHIFT:SVE_I
4869              (match_operand:SVE_I 1 "register_operand")
4870              (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
4871           UNSPEC_PRED_X))]
4872   "TARGET_SVE"
4873   {
4874     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4875   }
4876 )
4877
4878 ;; Shift by a vector, predicated with a PTRUE.  We don't actually need
4879 ;; the predicate for the first alternative, but using Upa or X isn't
4880 ;; likely to gain much and would make the instruction seem less uniform
4881 ;; to the register allocator.
4882 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4883   [(set (match_operand:SVE_I 0 "register_operand")
4884         (unspec:SVE_I
4885           [(match_operand:<VPRED> 1 "register_operand")
4886            (ASHIFT:SVE_I
4887              (match_operand:SVE_I 2 "register_operand")
4888              (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
4889           UNSPEC_PRED_X))]
4890   "TARGET_SVE"
4891   {@ [ cons: =0 , 1   , 2 , 3     ; attrs: movprfx ]
4892      [ w        , Upl , w , D<lr> ; *              ] #
4893      [ w        , Upl , 0 , w     ; *              ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4894      [ w        , Upl , w , 0     ; *              ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4895      [ ?&w      , Upl , w , w     ; yes            ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4896   }
4897   "&& reload_completed
4898    && !register_operand (operands[3], <MODE>mode)"
4899   [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
4900   ""
4901 )
4902
4903 ;; Unpredicated shift operations by a constant (post-RA only).
4904 ;; These are generated by splitting a predicated instruction whose
4905 ;; predicate is unused.
4906 (define_insn "*post_ra_v_ashl<mode>3"
4907   [(set (match_operand:SVE_I 0 "register_operand")
4908         (ashift:SVE_I
4909           (match_operand:SVE_I 1 "register_operand")
4910           (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
4911   "TARGET_SVE && reload_completed"
4912   {@ [ cons: =0 , 1 , 2   ]
4913      [ w        , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
4914      [ w        , w , Dl  ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
4915   }
4916 )
4917
4918 (define_insn "*post_ra_v_<optab><mode>3"
4919   [(set (match_operand:SVE_I 0 "register_operand" "=w")
4920         (SHIFTRT:SVE_I
4921           (match_operand:SVE_I 1 "register_operand" "w")
4922           (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
4923   "TARGET_SVE && reload_completed"
4924   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4925 )
4926
4927 ;; Predicated integer shift, merging with the first input.
4928 (define_insn "*cond_<optab><mode>_2_const"
4929   [(set (match_operand:SVE_I 0 "register_operand")
4930         (unspec:SVE_I
4931           [(match_operand:<VPRED> 1 "register_operand")
4932            (ASHIFT:SVE_I
4933              (match_operand:SVE_I 2 "register_operand")
4934              (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4935            (match_dup 2)]
4936          UNSPEC_SEL))]
4937   "TARGET_SVE"
4938   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
4939      [ w        , Upl , 0 ; *              ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4940      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4941   }
4942 )
4943
4944 ;; Predicated integer shift, merging with an independent value.
4945 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4946   [(set (match_operand:SVE_I 0 "register_operand")
4947         (unspec:SVE_I
4948           [(match_operand:<VPRED> 1 "register_operand")
4949            (ASHIFT:SVE_I
4950              (match_operand:SVE_I 2 "register_operand")
4951              (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4952            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4953          UNSPEC_SEL))]
4954   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4955   {@ [ cons: =0 , 1   , 2 , 4   ]
4956      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4957      [ &w       , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4958      [ ?&w      , Upl , w , w   ] #
4959   }
4960   "&& reload_completed
4961    && register_operand (operands[4], <MODE>mode)
4962    && !rtx_equal_p (operands[0], operands[4])"
4963   {
4964     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4965                                              operands[4], operands[1]));
4966     operands[4] = operands[2] = operands[0];
4967   }
4968   [(set_attr "movprfx" "yes")]
4969 )
4970
4971 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4972 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4973   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4974         (unspec:SVE_FULL_BHSI
4975           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4976            (match_operand:VNx2DI 2 "register_operand" "w")]
4977           SVE_SHIFT_WIDE))]
4978   "TARGET_SVE"
4979   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4980 )
4981
4982 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4983 (define_expand "@cond_<sve_int_op><mode>"
4984   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4985         (unspec:SVE_FULL_BHSI
4986           [(match_operand:<VPRED> 1 "register_operand")
4987            (unspec:SVE_FULL_BHSI
4988              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4989               (match_operand:VNx2DI 3 "register_operand")]
4990              SVE_SHIFT_WIDE)
4991            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4992           UNSPEC_SEL))]
4993   "TARGET_SVE"
4994 )
4995
4996 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4997 ;; the first input.
4998 (define_insn "*cond_<sve_int_op><mode>_m"
4999   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
5000         (unspec:SVE_FULL_BHSI
5001           [(match_operand:<VPRED> 1 "register_operand")
5002            (unspec:SVE_FULL_BHSI
5003              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
5004               (match_operand:VNx2DI 3 "register_operand")]
5005              SVE_SHIFT_WIDE)
5006            (match_dup 2)]
5007          UNSPEC_SEL))]
5008   "TARGET_SVE"
5009   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5010      [ w        , Upl , 0 , w ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5011      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5012   }
5013 )
5014
5015 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
5016 (define_insn "*cond_<sve_int_op><mode>_z"
5017   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
5018         (unspec:SVE_FULL_BHSI
5019           [(match_operand:<VPRED> 1 "register_operand")
5020            (unspec:SVE_FULL_BHSI
5021              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
5022               (match_operand:VNx2DI 3 "register_operand")]
5023              SVE_SHIFT_WIDE)
5024            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
5025          UNSPEC_SEL))]
5026   "TARGET_SVE"
5027   {@ [ cons: =0 , 1   , 2 , 3  ]
5028      [ &w       , Upl , 0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5029      [ &w       , Upl , w , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5030   }
5031   [(set_attr "movprfx" "yes")])
5032
5033 ;; -------------------------------------------------------------------------
5034 ;; ---- [INT] Shifts (rounding towards 0)
5035 ;; -------------------------------------------------------------------------
5036 ;; Includes:
5037 ;; - ASRD
5038 ;; - SQSHLU (SVE2)
5039 ;; - SRSHR (SVE2)
5040 ;; - URSHR (SVE2)
5041 ;; -------------------------------------------------------------------------
5042
5043 ;; Unpredicated ASRD.
5044 (define_expand "sdiv_pow2<mode>3"
5045   [(set (match_operand:SVE_VDQ_I 0 "register_operand")
5046         (unspec:SVE_VDQ_I
5047           [(match_dup 3)
5048            (unspec:SVE_VDQ_I
5049              [(match_operand:SVE_VDQ_I 1 "register_operand")
5050               (match_operand 2 "aarch64_simd_rshift_imm")]
5051              UNSPEC_ASRD)]
5052          UNSPEC_PRED_X))]
5053   "TARGET_SVE"
5054   {
5055     operands[3] = aarch64_ptrue_reg (<VPRED>mode, <MODE>mode);
5056   }
5057 )
5058
5059 ;; Predicated ASRD.
5060 (define_insn "*sdiv_pow2<mode>3"
5061   [(set (match_operand:SVE_VDQ_I 0 "register_operand")
5062         (unspec:SVE_VDQ_I
5063           [(match_operand:<VPRED> 1 "register_operand")
5064            (unspec:SVE_VDQ_I
5065              [(match_operand:SVE_VDQ_I 2 "register_operand")
5066               (match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")]
5067              UNSPEC_ASRD)]
5068           UNSPEC_PRED_X))]
5069   "TARGET_SVE"
5070   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
5071      [ w        , Upl , 0 ; *              ] asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
5072      [ ?&w      , Upl , w ; yes            ] movprfx\t%Z0, %Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
5073   }
5074 )
5075
5076 ;; Predicated shift with merging.
5077 (define_expand "@cond_<sve_int_op><mode>"
5078   [(set (match_operand:SVE_I 0 "register_operand")
5079         (unspec:SVE_I
5080           [(match_operand:<VPRED> 1 "register_operand")
5081            (unspec:SVE_I
5082              [(match_dup 5)
5083               (unspec:SVE_I
5084                 [(match_operand:SVE_I 2 "register_operand")
5085                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5086                 SVE_INT_SHIFT_IMM)]
5087              UNSPEC_PRED_X)
5088            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5089           UNSPEC_SEL))]
5090   "TARGET_SVE"
5091   {
5092     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
5093   }
5094 )
5095
5096 ;; Predicated shift, merging with the first input.
5097 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
5098   [(set (match_operand:SVE_I 0 "register_operand")
5099         (unspec:SVE_I
5100           [(match_operand:<VPRED> 1 "register_operand")
5101            (unspec:SVE_I
5102              [(match_operand 4)
5103               (unspec:SVE_I
5104                 [(match_operand:SVE_I 2 "register_operand")
5105                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5106                 SVE_INT_SHIFT_IMM)]
5107              UNSPEC_PRED_X)
5108            (match_dup 2)]
5109           UNSPEC_SEL))]
5110   "TARGET_SVE"
5111   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
5112      [ w        , Upl , 0 ; *              ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5113      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5114   }
5115   "&& !CONSTANT_P (operands[4])"
5116   {
5117     operands[4] = CONSTM1_RTX (<VPRED>mode);
5118   }
5119 )
5120
5121 ;; Predicated shift, merging with an independent value.
5122 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
5123   [(set (match_operand:SVE_I 0 "register_operand")
5124         (unspec:SVE_I
5125           [(match_operand:<VPRED> 1 "register_operand")
5126            (unspec:SVE_I
5127              [(match_operand 5)
5128               (unspec:SVE_I
5129                 [(match_operand:SVE_I 2 "register_operand")
5130                  (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5131                 SVE_INT_SHIFT_IMM)]
5132              UNSPEC_PRED_X)
5133            (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5134          UNSPEC_SEL))]
5135   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5136   {@ [ cons: =0 , 1   , 2 , 4   ]
5137      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5138      [ &w       , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5139      [ ?&w      , Upl , w , w   ] #
5140   }
5141   "&& reload_completed
5142    && register_operand (operands[4], <MODE>mode)
5143    && !rtx_equal_p (operands[0], operands[4])"
5144   {
5145     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5146                                              operands[4], operands[1]));
5147     operands[4] = operands[2] = operands[0];
5148   }
5149   [(set_attr "movprfx" "yes")]
5150 )
5151
5152 ;; -------------------------------------------------------------------------
5153 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
5154 ;; -------------------------------------------------------------------------
5155 ;; Includes:
5156 ;; - FSCALE
5157 ;; - FTSMUL
5158 ;; - FTSSEL
5159 ;; -------------------------------------------------------------------------
5160
5161 (define_expand "ldexp<mode>3"
5162  [(set (match_operand:GPF_HF 0 "register_operand")
5163        (unspec:GPF_HF
5164          [(match_dup 3)
5165           (const_int SVE_STRICT_GP)
5166           (match_operand:GPF_HF 1 "register_operand")
5167           (match_operand:<V_INT_EQUIV> 2 "register_operand")]
5168          UNSPEC_COND_FSCALE))]
5169  "TARGET_SVE"
5170  {
5171    operands[3] = aarch64_ptrue_reg (<VPRED>mode,
5172                                     GET_MODE_UNIT_SIZE (<MODE>mode));
5173  }
5174 )
5175
5176 ;; Unpredicated floating-point binary operations that take an integer as
5177 ;; their second operand.
5178 (define_insn "@aarch64_sve_<optab><mode>"
5179   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5180         (unspec:SVE_FULL_F
5181           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5182            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
5183           SVE_FP_BINARY_INT))]
5184   "TARGET_SVE"
5185   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5186 )
5187
5188 ;; Predicated floating-point binary operations that take an integer
5189 ;; as their second operand.
5190 (define_insn "@aarch64_pred_<optab><mode>"
5191   [(set (match_operand:SVE_FULL_F_SCALAR 0 "register_operand")
5192         (unspec:SVE_FULL_F_SCALAR
5193           [(match_operand:<VPRED> 1 "register_operand")
5194            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5195            (match_operand:SVE_FULL_F_SCALAR 2 "register_operand")
5196            (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5197           SVE_COND_FP_BINARY_INT))]
5198   "TARGET_SVE"
5199   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5200      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
5201      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%Z0, %Z2\;<sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
5202   }
5203 )
5204
5205 ;; Predicated floating-point binary operations with merging, taking an
5206 ;; integer as their second operand.
5207 (define_expand "@cond_<optab><mode>"
5208   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5209         (unspec:SVE_FULL_F
5210           [(match_operand:<VPRED> 1 "register_operand")
5211            (unspec:SVE_FULL_F
5212              [(match_dup 1)
5213               (const_int SVE_STRICT_GP)
5214               (match_operand:SVE_FULL_F 2 "register_operand")
5215               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5216              SVE_COND_FP_BINARY_INT)
5217            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5218           UNSPEC_SEL))]
5219   "TARGET_SVE"
5220 )
5221
5222 ;; Predicated floating-point binary operations that take an integer as their
5223 ;; second operand, with inactive lanes coming from the first operand.
5224 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5225   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5226         (unspec:SVE_FULL_F
5227           [(match_operand:<VPRED> 1 "register_operand")
5228            (unspec:SVE_FULL_F
5229              [(match_operand 4)
5230               (const_int SVE_RELAXED_GP)
5231               (match_operand:SVE_FULL_F 2 "register_operand")
5232               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5233              SVE_COND_FP_BINARY_INT)
5234            (match_dup 2)]
5235           UNSPEC_SEL))]
5236   "TARGET_SVE"
5237   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5238      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5239      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5240   }
5241   "&& !rtx_equal_p (operands[1], operands[4])"
5242   {
5243     operands[4] = copy_rtx (operands[1]);
5244   }
5245 )
5246
5247 (define_insn "*cond_<optab><mode>_2_strict"
5248   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5249         (unspec:SVE_FULL_F
5250           [(match_operand:<VPRED> 1 "register_operand")
5251            (unspec:SVE_FULL_F
5252              [(match_dup 1)
5253               (const_int SVE_STRICT_GP)
5254               (match_operand:SVE_FULL_F 2 "register_operand")
5255               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5256              SVE_COND_FP_BINARY_INT)
5257            (match_dup 2)]
5258           UNSPEC_SEL))]
5259   "TARGET_SVE"
5260   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5261      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5262      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5263   }
5264 )
5265
5266 ;; Predicated floating-point binary operations that take an integer as
5267 ;; their second operand, with the values of inactive lanes being distinct
5268 ;; from the other inputs.
5269 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5270   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5271         (unspec:SVE_FULL_F
5272           [(match_operand:<VPRED> 1 "register_operand")
5273            (unspec:SVE_FULL_F
5274              [(match_operand 5)
5275               (const_int SVE_RELAXED_GP)
5276               (match_operand:SVE_FULL_F 2 "register_operand")
5277               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5278              SVE_COND_FP_BINARY_INT)
5279            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5280           UNSPEC_SEL))]
5281   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5282   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5283      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5284      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5285      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5286      [ ?&w      , Upl , w , w , w   ] #
5287   }
5288   "&& 1"
5289   {
5290     if (reload_completed
5291         && register_operand (operands[4], <MODE>mode)
5292         && !rtx_equal_p (operands[0], operands[4]))
5293       {
5294         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5295                                                  operands[4], operands[1]));
5296         operands[4] = operands[2] = operands[0];
5297       }
5298     else if (!rtx_equal_p (operands[1], operands[5]))
5299       operands[5] = copy_rtx (operands[1]);
5300     else
5301       FAIL;
5302   }
5303   [(set_attr "movprfx" "yes")]
5304 )
5305
5306 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5307   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5308         (unspec:SVE_FULL_F
5309           [(match_operand:<VPRED> 1 "register_operand")
5310            (unspec:SVE_FULL_F
5311              [(match_dup 1)
5312               (const_int SVE_STRICT_GP)
5313               (match_operand:SVE_FULL_F 2 "register_operand")
5314               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5315              SVE_COND_FP_BINARY_INT)
5316            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5317           UNSPEC_SEL))]
5318   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5319   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
5320      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5321      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5322      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5323      [ ?&w      , Upl , w , w , w   ] #
5324   }
5325   "&& reload_completed
5326    && register_operand (operands[4], <MODE>mode)
5327    && !rtx_equal_p (operands[0], operands[4])"
5328   {
5329     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5330                                              operands[4], operands[1]));
5331     operands[4] = operands[2] = operands[0];
5332   }
5333   [(set_attr "movprfx" "yes")]
5334 )
5335
5336 ;; -------------------------------------------------------------------------
5337 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
5338 ;; -------------------------------------------------------------------------
5339 ;; Includes post-RA forms of:
5340 ;; - BFADD (SVE_B16B16)
5341 ;; - BFMUL (SVE_B16B16)
5342 ;; - BFSUB (SVE_B16B16)
5343 ;; - FADD
5344 ;; - FMUL
5345 ;; - FSUB
5346 ;; -------------------------------------------------------------------------
5347
5348 ;; Split a predicated instruction whose predicate is unused into an
5349 ;; unpredicated instruction.
5350 (define_split
5351   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5352         (unspec:SVE_FULL_F_BF
5353           [(match_operand:<VPRED> 1 "register_operand")
5354            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5355            (match_operand:SVE_FULL_F_BF 2 "register_operand")
5356            (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5357           <SVE_COND_FP>))]
5358   "TARGET_SVE
5359    && reload_completed
5360    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5361   [(set (match_dup 0)
5362         (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF (match_dup 2) (match_dup 3)))]
5363 )
5364
5365 ;; Unpredicated floating-point binary operations (post-RA only).
5366 ;; These are generated by the split above.
5367 (define_insn "*post_ra_<sve_fp_op><mode>3"
5368   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
5369         (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF
5370           (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")
5371           (match_operand:SVE_FULL_F_BF 2 "register_operand" "w")))]
5372   "TARGET_SVE && reload_completed"
5373   "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
5374
5375 ;; -------------------------------------------------------------------------
5376 ;; ---- [FP] General binary arithmetic corresponding to unspecs
5377 ;; -------------------------------------------------------------------------
5378 ;; Includes merging forms of:
5379 ;; - BFADD   (SVE_B16B16)
5380 ;; - BFMAX   (SVE_B16B16)
5381 ;; - BFMAXNM (SVE_B16B16)
5382 ;; - BFMIN   (SVE_B16B16)
5383 ;; - BFMINNM (SVE_B16B16)
5384 ;; - BFMUL   (SVE_B16B16)
5385 ;; - BFSUB   (SVE_B16B16)
5386 ;; - FADD    (constant forms handled in the "Addition" section)
5387 ;; - FDIV
5388 ;; - FDIVR
5389 ;; - FMAX
5390 ;; - FMAXNM  (including #0.0 and #1.0)
5391 ;; - FMIN
5392 ;; - FMINNM  (including #0.0 and #1.0)
5393 ;; - FMUL    (including #0.5 and #2.0)
5394 ;; - FMULX
5395 ;; - FRECPS
5396 ;; - FRSQRTS
5397 ;; - FSUB    (constant forms handled in the "Addition" section)
5398 ;; - FSUBR   (constant forms handled in the "Subtraction" section)
5399 ;; -------------------------------------------------------------------------
5400
5401 ;; Unpredicated floating-point binary operations.
5402 (define_insn "@aarch64_sve_<optab><mode>"
5403   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5404         (unspec:SVE_FULL_F
5405           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5406            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5407           SVE_FP_BINARY))]
5408   "TARGET_SVE"
5409   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5410 )
5411
5412 ;; Unpredicated floating-point binary operations that need to be predicated
5413 ;; for SVE.
5414 (define_expand "<optab><mode>3"
5415   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5416         (unspec:SVE_FULL_F_BF
5417           [(match_dup 3)
5418            (const_int SVE_RELAXED_GP)
5419            (match_operand:SVE_FULL_F_BF 1 "<sve_pred_fp_rhs1_operand>")
5420            (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs2_operand>")]
5421           SVE_COND_FP_BINARY_OPTAB))]
5422   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5423   {
5424     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5425   }
5426 )
5427
5428 ;; Predicated floating-point binary operations that have no immediate forms.
5429 (define_insn "@aarch64_pred_<optab><mode>"
5430   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5431         (unspec:SVE_FULL_F
5432           [(match_operand:<VPRED> 1 "register_operand")
5433            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5434            (match_operand:SVE_FULL_F 2 "register_operand")
5435            (match_operand:SVE_FULL_F 3 "register_operand")]
5436           SVE_COND_FP_BINARY_REG))]
5437   "TARGET_SVE"
5438   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5439      [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5440      [ w        , Upl , w , 0 ; *              ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5441      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5442   }
5443 )
5444
5445 ;; Predicated floating-point operations with merging.
5446 (define_expand "@cond_<optab><mode>"
5447   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5448         (unspec:SVE_FULL_F_BF
5449           [(match_operand:<VPRED> 1 "register_operand")
5450            (unspec:SVE_FULL_F_BF
5451              [(match_dup 1)
5452               (const_int SVE_STRICT_GP)
5453               (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs1_operand>")
5454               (match_operand:SVE_FULL_F_BF 3 "<sve_pred_fp_rhs2_operand>")]
5455              SVE_COND_FP_BINARY)
5456            (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
5457           UNSPEC_SEL))]
5458   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5459 )
5460
5461 ;; Predicated floating-point operations, merging with the first input.
5462 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5463   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5464         (unspec:SVE_FULL_F_BF
5465           [(match_operand:<VPRED> 1 "register_operand")
5466            (unspec:SVE_FULL_F_BF
5467              [(match_operand 4)
5468               (const_int SVE_RELAXED_GP)
5469               (match_operand:SVE_FULL_F_BF 2 "register_operand")
5470               (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5471              SVE_COND_FP_BINARY)
5472            (match_dup 2)]
5473           UNSPEC_SEL))]
5474   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5475   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5476      [ w        , Upl , 0 , w ; *              ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5477      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5478   }
5479   "&& !rtx_equal_p (operands[1], operands[4])"
5480   {
5481     operands[4] = copy_rtx (operands[1]);
5482   }
5483 )
5484
5485 (define_insn "*cond_<optab><mode>_2_strict"
5486   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5487         (unspec:SVE_FULL_F_BF
5488           [(match_operand:<VPRED> 1 "register_operand")
5489            (unspec:SVE_FULL_F_BF
5490              [(match_dup 1)
5491               (const_int SVE_STRICT_GP)
5492               (match_operand:SVE_FULL_F_BF 2 "register_operand")
5493               (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5494              SVE_COND_FP_BINARY)
5495            (match_dup 2)]
5496           UNSPEC_SEL))]
5497   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5498   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5499      [ w        , Upl , 0 , w ; *              ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5500      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5501   }
5502 )
5503
5504 ;; Same for operations that take a 1-bit constant.
5505 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
5506   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5507         (unspec:SVE_FULL_F
5508           [(match_operand:<VPRED> 1 "register_operand")
5509            (unspec:SVE_FULL_F
5510              [(match_operand 4)
5511               (const_int SVE_RELAXED_GP)
5512               (match_operand:SVE_FULL_F 2 "register_operand")
5513               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5514              SVE_COND_FP_BINARY_I1)
5515            (match_dup 2)]
5516           UNSPEC_SEL))]
5517   "TARGET_SVE"
5518   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
5519      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5520      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5521   }
5522   "&& !rtx_equal_p (operands[1], operands[4])"
5523   {
5524     operands[4] = copy_rtx (operands[1]);
5525   }
5526 )
5527
5528 (define_insn "*cond_<optab><mode>_2_const_strict"
5529   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5530         (unspec:SVE_FULL_F
5531           [(match_operand:<VPRED> 1 "register_operand")
5532            (unspec:SVE_FULL_F
5533              [(match_dup 1)
5534               (const_int SVE_STRICT_GP)
5535               (match_operand:SVE_FULL_F 2 "register_operand")
5536               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5537              SVE_COND_FP_BINARY_I1)
5538            (match_dup 2)]
5539           UNSPEC_SEL))]
5540   "TARGET_SVE"
5541   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
5542      [ w        , Upl , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5543      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5544   }
5545 )
5546
5547 ;; Predicated floating-point operations, merging with the second input.
5548 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
5549   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5550         (unspec:SVE_FULL_F_BF
5551           [(match_operand:<VPRED> 1 "register_operand")
5552            (unspec:SVE_FULL_F_BF
5553              [(match_operand 4)
5554               (const_int SVE_RELAXED_GP)
5555               (match_operand:SVE_FULL_F_BF 2 "register_operand")
5556               (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5557              SVE_COND_FP_BINARY)
5558            (match_dup 3)]
5559           UNSPEC_SEL))]
5560   "TARGET_SVE && (<supports_bf16_rev> || !<is_bf16>)"
5561   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5562      [ w        , Upl , w , 0 ; *              ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5563      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5564   }
5565   "&& !rtx_equal_p (operands[1], operands[4])"
5566   {
5567     operands[4] = copy_rtx (operands[1]);
5568   }
5569 )
5570
5571 (define_insn "*cond_<optab><mode>_3_strict"
5572   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5573         (unspec:SVE_FULL_F_BF
5574           [(match_operand:<VPRED> 1 "register_operand")
5575            (unspec:SVE_FULL_F_BF
5576              [(match_dup 1)
5577               (const_int SVE_STRICT_GP)
5578               (match_operand:SVE_FULL_F_BF 2 "register_operand")
5579               (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5580              SVE_COND_FP_BINARY)
5581            (match_dup 3)]
5582           UNSPEC_SEL))]
5583   "TARGET_SVE && (<supports_bf16_rev> || !<is_bf16>)"
5584   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5585      [ w        , Upl , w , 0 ; *              ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5586      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5587   }
5588 )
5589
5590 ;; Predicated floating-point operations, merging with an independent value.
5591 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5592   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5593         (unspec:SVE_FULL_F_BF
5594           [(match_operand:<VPRED> 1 "register_operand")
5595            (unspec:SVE_FULL_F_BF
5596              [(match_operand 5)
5597               (const_int SVE_RELAXED_GP)
5598               (match_operand:SVE_FULL_F_BF 2 "register_operand")
5599               (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5600              SVE_COND_FP_BINARY)
5601            (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
5602           UNSPEC_SEL))]
5603   "TARGET_SVE
5604    && (<supports_bf16> || !<is_bf16>)
5605    && !rtx_equal_p (operands[2], operands[4])
5606    && !((<supports_bf16_rev> || !<is_bf16>)
5607         && rtx_equal_p (operands[3], operands[4]))"
5608   {@ [ cons: =0 , 1   , 2 , 3 , 4  ; attrs: is_rev ]
5609      [ &w       , Upl , 0 , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5610      [ &w       , Upl , w , 0 , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5611      [ &w       , Upl , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5612      [ &w       , Upl , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5613      [ ?&w      , Upl , w , w , w  ; *    ] #
5614   }
5615   "&& 1"
5616   {
5617     if (reload_completed
5618         && register_operand (operands[4], <MODE>mode)
5619         && !rtx_equal_p (operands[0], operands[4]))
5620       {
5621         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5622                                                  operands[4], operands[1]));
5623         operands[4] = operands[2] = operands[0];
5624       }
5625     else if (!rtx_equal_p (operands[1], operands[5]))
5626       operands[5] = copy_rtx (operands[1]);
5627     else
5628       FAIL;
5629   }
5630   [(set_attr "movprfx" "yes")
5631    (set_attr "is_bf16" "<is_bf16>")
5632    (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
5633 )
5634
5635 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5636   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5637         (unspec:SVE_FULL_F_BF
5638           [(match_operand:<VPRED> 1 "register_operand")
5639            (unspec:SVE_FULL_F_BF
5640              [(match_dup 1)
5641               (const_int SVE_STRICT_GP)
5642               (match_operand:SVE_FULL_F_BF 2 "register_operand")
5643               (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5644              SVE_COND_FP_BINARY)
5645            (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
5646           UNSPEC_SEL))]
5647   "TARGET_SVE
5648    && (<supports_bf16> || !<is_bf16>)
5649    && !rtx_equal_p (operands[2], operands[4])
5650    && !((<supports_bf16_rev> || !<is_bf16>)
5651         && rtx_equal_p (operands[3], operands[4]))"
5652   {@ [ cons: =0 , 1   , 2 , 3 , 4  ; attrs: is_rev ]
5653      [ &w       , Upl , 0 , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5654      [ &w       , Upl , w , 0 , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5655      [ &w       , Upl , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5656      [ &w       , Upl , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5657      [ ?&w      , Upl , w , w , w  ; *    ] #
5658   }
5659   "&& reload_completed
5660    && register_operand (operands[4], <MODE>mode)
5661    && !rtx_equal_p (operands[0], operands[4])"
5662   {
5663     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5664                                              operands[4], operands[1]));
5665     operands[4] = operands[2] = operands[0];
5666   }
5667   [(set_attr "movprfx" "yes")
5668    (set_attr "is_bf16" "<is_bf16>")
5669    (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
5670 )
5671
5672 ;; Same for operations that take a 1-bit constant.
5673 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
5674   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5675         (unspec:SVE_FULL_F
5676           [(match_operand:<VPRED> 1 "register_operand")
5677            (unspec:SVE_FULL_F
5678              [(match_operand 5)
5679               (const_int SVE_RELAXED_GP)
5680               (match_operand:SVE_FULL_F 2 "register_operand")
5681               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5682              SVE_COND_FP_BINARY_I1)
5683            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5684           UNSPEC_SEL))]
5685   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5686   {@ [ cons: =0 , 1   , 2 , 4   ]
5687      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5688      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5689      [ ?w       , Upl , w , w   ] #
5690   }
5691   "&& 1"
5692   {
5693     if (reload_completed
5694         && register_operand (operands[4], <MODE>mode)
5695         && !rtx_equal_p (operands[0], operands[4]))
5696       {
5697         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5698                                                  operands[4], operands[1]));
5699         operands[4] = operands[2] = operands[0];
5700       }
5701     else if (!rtx_equal_p (operands[1], operands[5]))
5702       operands[5] = copy_rtx (operands[1]);
5703     else
5704       FAIL;
5705   }
5706   [(set_attr "movprfx" "yes")]
5707 )
5708
5709 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5710   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5711         (unspec:SVE_FULL_F
5712           [(match_operand:<VPRED> 1 "register_operand")
5713            (unspec:SVE_FULL_F
5714              [(match_dup 1)
5715               (const_int SVE_STRICT_GP)
5716               (match_operand:SVE_FULL_F 2 "register_operand")
5717               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5718              SVE_COND_FP_BINARY_I1)
5719            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5720           UNSPEC_SEL))]
5721   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5722   {@ [ cons: =0 , 1   , 2 , 4   ]
5723      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5724      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5725      [ ?w       , Upl , w , w   ] #
5726   }
5727   "&& reload_completed
5728    && register_operand (operands[4], <MODE>mode)
5729    && !rtx_equal_p (operands[0], operands[4])"
5730   {
5731     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5732                                              operands[4], operands[1]));
5733     operands[4] = operands[2] = operands[0];
5734   }
5735   [(set_attr "movprfx" "yes")]
5736 )
5737
5738 ;; -------------------------------------------------------------------------
5739 ;; ---- [FP] Addition
5740 ;; -------------------------------------------------------------------------
5741 ;; Includes:
5742 ;; - FADD
5743 ;; - FSUB
5744 ;; -------------------------------------------------------------------------
5745
5746 ;; Predicated floating-point addition.
5747 (define_insn "@aarch64_pred_<optab><mode>"
5748   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5749         (unspec:SVE_FULL_F
5750           [(match_operand:<VPRED> 1 "register_operand")
5751            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5752            (match_operand:SVE_FULL_F 2 "register_operand")
5753            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
5754           SVE_COND_FP_ADD))]
5755   "TARGET_SVE"
5756   {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
5757      [ w        , Upl , %0 , vsA , i   ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5758      [ w        , Upl , 0  , vsN , i   ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5759      [ w        , Upl , w  , w   , Z   ; *              ] #
5760      [ w        , Upl , 0  , w   , Ui1 ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5761      [ ?&w      , Upl , w  , vsA , i   ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5762      [ ?&w      , Upl , w  , vsN , i   ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5763      [ ?&w      , Upl , w  , w   , Ui1 ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5764   }
5765 )
5766
5767 ;; Predicated floating-point addition of a constant, merging with the
5768 ;; first input.
5769 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
5770   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5771         (unspec:SVE_FULL_F
5772           [(match_operand:<VPRED> 1 "register_operand")
5773            (unspec:SVE_FULL_F
5774              [(match_operand 4)
5775               (const_int SVE_RELAXED_GP)
5776               (match_operand:SVE_FULL_F 2 "register_operand")
5777               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5778              UNSPEC_COND_FADD)
5779            (match_dup 2)]
5780           UNSPEC_SEL))]
5781   "TARGET_SVE"
5782   {@ [ cons: =0 , 1   , 2 , 3   ; attrs: movprfx ]
5783      [ w        , Upl , 0 , vsA ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5784      [ w        , Upl , 0 , vsN ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5785      [ ?w       , Upl , w , vsA ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5786      [ ?w       , Upl , w , vsN ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5787   }
5788   "&& !rtx_equal_p (operands[1], operands[4])"
5789   {
5790     operands[4] = copy_rtx (operands[1]);
5791   }
5792 )
5793
5794 (define_insn "*cond_add<mode>_2_const_strict"
5795   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5796         (unspec:SVE_FULL_F
5797           [(match_operand:<VPRED> 1 "register_operand")
5798            (unspec:SVE_FULL_F
5799              [(match_dup 1)
5800               (const_int SVE_STRICT_GP)
5801               (match_operand:SVE_FULL_F 2 "register_operand")
5802               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5803              UNSPEC_COND_FADD)
5804            (match_dup 2)]
5805           UNSPEC_SEL))]
5806   "TARGET_SVE"
5807   {@ [ cons: =0 , 1   , 2 , 3   ; attrs: movprfx ]
5808      [ w        , Upl , 0 , vsA ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5809      [ w        , Upl , 0 , vsN ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5810      [ ?w       , Upl , w , vsA ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5811      [ ?w       , Upl , w , vsN ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5812   }
5813 )
5814
5815 ;; Predicated floating-point addition of a constant, merging with an
5816 ;; independent value.
5817 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5818   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5819         (unspec:SVE_FULL_F
5820           [(match_operand:<VPRED> 1 "register_operand")
5821            (unspec:SVE_FULL_F
5822              [(match_operand 5)
5823               (const_int SVE_RELAXED_GP)
5824               (match_operand:SVE_FULL_F 2 "register_operand")
5825               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5826              UNSPEC_COND_FADD)
5827            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5828           UNSPEC_SEL))]
5829   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5830   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
5831      [ w        , Upl , w , vsA , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5832      [ w        , Upl , w , vsN , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5833      [ w        , Upl , w , vsA , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5834      [ w        , Upl , w , vsN , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5835      [ ?w       , Upl , w , vsA , w   ] #
5836      [ ?w       , Upl , w , vsN , w   ] #
5837   }
5838   "&& 1"
5839   {
5840     if (reload_completed
5841         && register_operand (operands[4], <MODE>mode)
5842         && !rtx_equal_p (operands[0], operands[4]))
5843       {
5844         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5845                                                  operands[4], operands[1]));
5846         operands[4] = operands[2] = operands[0];
5847       }
5848     else if (!rtx_equal_p (operands[1], operands[5]))
5849       operands[5] = copy_rtx (operands[1]);
5850     else
5851       FAIL;
5852   }
5853   [(set_attr "movprfx" "yes")]
5854 )
5855
5856 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5857   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5858         (unspec:SVE_FULL_F
5859           [(match_operand:<VPRED> 1 "register_operand")
5860            (unspec:SVE_FULL_F
5861              [(match_dup 1)
5862               (const_int SVE_STRICT_GP)
5863               (match_operand:SVE_FULL_F 2 "register_operand")
5864               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5865              UNSPEC_COND_FADD)
5866            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5867           UNSPEC_SEL))]
5868   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5869   {@ [ cons: =0 , 1   , 2 , 3   , 4   ]
5870      [ w        , Upl , w , vsA , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5871      [ w        , Upl , w , vsN , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5872      [ w        , Upl , w , vsA , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5873      [ w        , Upl , w , vsN , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5874      [ ?w       , Upl , w , vsA , w   ] #
5875      [ ?w       , Upl , w , vsN , w   ] #
5876   }
5877   "&& reload_completed
5878    && register_operand (operands[4], <MODE>mode)
5879    && !rtx_equal_p (operands[0], operands[4])"
5880   {
5881     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5882                                              operands[4], operands[1]));
5883     operands[4] = operands[2] = operands[0];
5884   }
5885   [(set_attr "movprfx" "yes")]
5886 )
5887
5888 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5889
5890 ;; -------------------------------------------------------------------------
5891 ;; ---- [FP] Complex addition
5892 ;; -------------------------------------------------------------------------
5893 ;; Includes:
5894 ;; - FCADD
5895 ;; -------------------------------------------------------------------------
5896
5897 ;; Predicated FCADD.
5898 (define_insn "@aarch64_pred_<optab><mode>"
5899   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5900         (unspec:SVE_FULL_F
5901           [(match_operand:<VPRED> 1 "register_operand")
5902            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5903            (match_operand:SVE_FULL_F 2 "register_operand")
5904            (match_operand:SVE_FULL_F 3 "register_operand")]
5905           SVE_COND_FCADD))]
5906   "TARGET_SVE"
5907   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5908      [ w        , Upl , 0 , w ; *              ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5909      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5910   }
5911 )
5912
5913 ;; Predicated FCADD with merging.
5914 (define_expand "@cond_<optab><mode>"
5915   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5916         (unspec:SVE_FULL_F
5917           [(match_operand:<VPRED> 1 "register_operand")
5918            (unspec:SVE_FULL_F
5919              [(match_dup 1)
5920               (const_int SVE_STRICT_GP)
5921               (match_operand:SVE_FULL_F 2 "register_operand")
5922               (match_operand:SVE_FULL_F 3 "register_operand")]
5923              SVE_COND_FCADD)
5924            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5925           UNSPEC_SEL))]
5926   "TARGET_SVE"
5927 )
5928
5929 ;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
5930 (define_expand "@cadd<rot><mode>3"
5931   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5932         (unspec:SVE_FULL_F
5933           [(match_dup 3)
5934            (const_int SVE_RELAXED_GP)
5935            (match_operand:SVE_FULL_F 1 "register_operand")
5936            (match_operand:SVE_FULL_F 2 "register_operand")]
5937           SVE_COND_FCADD))]
5938   "TARGET_SVE"
5939 {
5940   operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5941 })
5942
5943 ;; Predicated FCADD, merging with the first input.
5944 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5945   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5946         (unspec:SVE_FULL_F
5947           [(match_operand:<VPRED> 1 "register_operand")
5948            (unspec:SVE_FULL_F
5949              [(match_operand 4)
5950               (const_int SVE_RELAXED_GP)
5951               (match_operand:SVE_FULL_F 2 "register_operand")
5952               (match_operand:SVE_FULL_F 3 "register_operand")]
5953              SVE_COND_FCADD)
5954            (match_dup 2)]
5955           UNSPEC_SEL))]
5956   "TARGET_SVE"
5957   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5958      [ w        , Upl , 0 , w ; *              ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5959      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5960   }
5961   "&& !rtx_equal_p (operands[1], operands[4])"
5962   {
5963     operands[4] = copy_rtx (operands[1]);
5964   }
5965 )
5966
5967 (define_insn "*cond_<optab><mode>_2_strict"
5968   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5969         (unspec:SVE_FULL_F
5970           [(match_operand:<VPRED> 1 "register_operand")
5971            (unspec:SVE_FULL_F
5972              [(match_dup 1)
5973               (const_int SVE_STRICT_GP)
5974               (match_operand:SVE_FULL_F 2 "register_operand")
5975               (match_operand:SVE_FULL_F 3 "register_operand")]
5976              SVE_COND_FCADD)
5977            (match_dup 2)]
5978           UNSPEC_SEL))]
5979   "TARGET_SVE"
5980   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
5981      [ w        , Upl , 0 , w ; *              ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5982      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5983   }
5984 )
5985
5986 ;; Predicated FCADD, merging with an independent value.
5987 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5988   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5989         (unspec:SVE_FULL_F
5990           [(match_operand:<VPRED> 1 "register_operand")
5991            (unspec:SVE_FULL_F
5992              [(match_operand 5)
5993               (const_int SVE_RELAXED_GP)
5994               (match_operand:SVE_FULL_F 2 "register_operand")
5995               (match_operand:SVE_FULL_F 3 "register_operand")]
5996              SVE_COND_FCADD)
5997            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5998           UNSPEC_SEL))]
5999   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
6000   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
6001      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6002      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6003      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6004      [ ?&w      , Upl , w , w , w   ] #
6005   }
6006   "&& 1"
6007   {
6008     if (reload_completed
6009         && register_operand (operands[4], <MODE>mode)
6010         && !rtx_equal_p (operands[0], operands[4]))
6011       {
6012         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
6013                                                  operands[4], operands[1]));
6014         operands[4] = operands[2] = operands[0];
6015       }
6016     else if (!rtx_equal_p (operands[1], operands[5]))
6017       operands[5] = copy_rtx (operands[1]);
6018     else
6019       FAIL;
6020   }
6021   [(set_attr "movprfx" "yes")]
6022 )
6023
6024 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
6025   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6026         (unspec:SVE_FULL_F
6027           [(match_operand:<VPRED> 1 "register_operand")
6028            (unspec:SVE_FULL_F
6029              [(match_dup 1)
6030               (const_int SVE_STRICT_GP)
6031               (match_operand:SVE_FULL_F 2 "register_operand")
6032               (match_operand:SVE_FULL_F 3 "register_operand")]
6033              SVE_COND_FCADD)
6034            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6035           UNSPEC_SEL))]
6036   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
6037   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
6038      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6039      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6040      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6041      [ ?&w      , Upl , w , w , w   ] #
6042   }
6043   "&& reload_completed
6044    && register_operand (operands[4], <MODE>mode)
6045    && !rtx_equal_p (operands[0], operands[4])"
6046   {
6047     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
6048                                              operands[4], operands[1]));
6049     operands[4] = operands[2] = operands[0];
6050   }
6051   [(set_attr "movprfx" "yes")]
6052 )
6053
6054 ;; -------------------------------------------------------------------------
6055 ;; ---- [FP] Subtraction
6056 ;; -------------------------------------------------------------------------
6057 ;; Includes:
6058 ;; - FSUB
6059 ;; - FSUBR
6060 ;; -------------------------------------------------------------------------
6061
6062 ;; Predicated floating-point subtraction.
6063 (define_insn "@aarch64_pred_<optab><mode>"
6064   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6065         (unspec:SVE_FULL_F
6066           [(match_operand:<VPRED> 1 "register_operand")
6067            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6068            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
6069            (match_operand:SVE_FULL_F 3 "register_operand")]
6070           SVE_COND_FP_SUB))]
6071   "TARGET_SVE"
6072   {@ [ cons: =0 , 1   , 2   , 3 , 4   ; attrs: movprfx ]
6073      [ w        , Upl , vsA , 0 , i   ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6074      [ w        , Upl , w   , w , Z   ; *              ] #
6075      [ w        , Upl , 0   , w , Ui1 ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6076      [ w        , Upl , w   , 0 , Ui1 ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6077      [ ?&w      , Upl , vsA , w , i   ; yes            ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6078      [ ?&w      , Upl , w   , w , Ui1 ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6079   }
6080 )
6081
6082 ;; Predicated floating-point subtraction from a constant, merging with the
6083 ;; second input.
6084 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
6085   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6086         (unspec:SVE_FULL_F
6087           [(match_operand:<VPRED> 1 "register_operand")
6088            (unspec:SVE_FULL_F
6089              [(match_operand 4)
6090               (const_int SVE_RELAXED_GP)
6091               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6092               (match_operand:SVE_FULL_F 3 "register_operand")]
6093              UNSPEC_COND_FSUB)
6094            (match_dup 3)]
6095           UNSPEC_SEL))]
6096   "TARGET_SVE"
6097   {@ [ cons: =0 , 1   , 3 ; attrs: movprfx ]
6098      [ w        , Upl , 0 ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6099      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6100   }
6101   "&& !rtx_equal_p (operands[1], operands[4])"
6102   {
6103     operands[4] = copy_rtx (operands[1]);
6104   }
6105 )
6106
6107 (define_insn "*cond_sub<mode>_3_const_strict"
6108   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6109         (unspec:SVE_FULL_F
6110           [(match_operand:<VPRED> 1 "register_operand")
6111            (unspec:SVE_FULL_F
6112              [(match_dup 1)
6113               (const_int SVE_STRICT_GP)
6114               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6115               (match_operand:SVE_FULL_F 3 "register_operand")]
6116              UNSPEC_COND_FSUB)
6117            (match_dup 3)]
6118           UNSPEC_SEL))]
6119   "TARGET_SVE"
6120   {@ [ cons: =0 , 1   , 3 ; attrs: movprfx ]
6121      [ w        , Upl , 0 ; *              ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6122      [ ?w       , Upl , w ; yes            ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6123   }
6124 )
6125
6126 ;; Predicated floating-point subtraction from a constant, merging with an
6127 ;; independent value.
6128 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
6129   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6130         (unspec:SVE_FULL_F
6131           [(match_operand:<VPRED> 1 "register_operand")
6132            (unspec:SVE_FULL_F
6133              [(match_operand 5)
6134               (const_int SVE_RELAXED_GP)
6135               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6136               (match_operand:SVE_FULL_F 3 "register_operand")]
6137              UNSPEC_COND_FSUB)
6138            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6139           UNSPEC_SEL))]
6140   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6141   {@ [ cons: =0 , 1   , 3 , 4   ]
6142      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6143      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6144      [ ?w       , Upl , w , w   ] #
6145   }
6146   "&& 1"
6147   {
6148     if (reload_completed
6149         && register_operand (operands[4], <MODE>mode)
6150         && !rtx_equal_p (operands[0], operands[4]))
6151       {
6152         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6153                                                  operands[4], operands[1]));
6154         operands[4] = operands[3] = operands[0];
6155       }
6156     else if (!rtx_equal_p (operands[1], operands[5]))
6157       operands[5] = copy_rtx (operands[1]);
6158     else
6159       FAIL;
6160   }
6161   [(set_attr "movprfx" "yes")]
6162 )
6163
6164 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
6165   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6166         (unspec:SVE_FULL_F
6167           [(match_operand:<VPRED> 1 "register_operand")
6168            (unspec:SVE_FULL_F
6169              [(match_dup 1)
6170               (const_int SVE_STRICT_GP)
6171               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6172               (match_operand:SVE_FULL_F 3 "register_operand")]
6173              UNSPEC_COND_FSUB)
6174            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6175           UNSPEC_SEL))]
6176   "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6177   {@ [ cons: =0 , 1   , 3 , 4   ]
6178      [ w        , Upl , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6179      [ w        , Upl , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6180      [ ?w       , Upl , w , w   ] #
6181   }
6182   "&& reload_completed
6183    && register_operand (operands[4], <MODE>mode)
6184    && !rtx_equal_p (operands[0], operands[4])"
6185   {
6186     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6187                                              operands[4], operands[1]));
6188     operands[4] = operands[3] = operands[0];
6189   }
6190   [(set_attr "movprfx" "yes")]
6191 )
6192 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
6193
6194 ;; -------------------------------------------------------------------------
6195 ;; ---- [FP] Absolute difference
6196 ;; -------------------------------------------------------------------------
6197 ;; Includes:
6198 ;; - FABD
6199 ;; -------------------------------------------------------------------------
6200
6201 ;; Predicated floating-point absolute difference.
6202 (define_expand "@aarch64_pred_abd<mode>"
6203   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6204         (unspec:SVE_FULL_F
6205           [(match_operand:<VPRED> 1 "register_operand")
6206            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6207            (unspec:SVE_FULL_F
6208              [(match_dup 1)
6209               (match_dup 4)
6210               (match_operand:SVE_FULL_F 2 "register_operand")
6211               (match_operand:SVE_FULL_F 3 "register_operand")]
6212              UNSPEC_COND_FSUB)]
6213           UNSPEC_COND_FABS))]
6214   "TARGET_SVE"
6215 )
6216
6217 ;; Predicated floating-point absolute difference.
6218 (define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
6219   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6220         (unspec:SVE_FULL_F
6221           [(match_operand:<VPRED> 1 "register_operand")
6222            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6223            (unspec:SVE_FULL_F
6224              [(match_operand 5)
6225               (const_int SVE_RELAXED_GP)
6226               (match_operand:SVE_FULL_F 2 "register_operand")
6227               (match_operand:SVE_FULL_F 3 "register_operand")]
6228              UNSPEC_COND_FSUB)]
6229           UNSPEC_COND_FABS))]
6230   "TARGET_SVE"
6231   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
6232      [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6233      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6234   }
6235   "&& !rtx_equal_p (operands[1], operands[5])"
6236   {
6237     operands[5] = copy_rtx (operands[1]);
6238   }
6239 )
6240
6241 (define_insn "*aarch64_pred_abd<mode>_strict"
6242   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6243         (unspec:SVE_FULL_F
6244           [(match_operand:<VPRED> 1 "register_operand")
6245            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6246            (unspec:SVE_FULL_F
6247              [(match_dup 1)
6248               (const_int SVE_STRICT_GP)
6249               (match_operand:SVE_FULL_F 2 "register_operand")
6250               (match_operand:SVE_FULL_F 3 "register_operand")]
6251              UNSPEC_COND_FSUB)]
6252           UNSPEC_COND_FABS))]
6253   "TARGET_SVE"
6254   {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
6255      [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6256      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6257   }
6258 )
6259
6260 (define_expand "@aarch64_cond_abd<mode>"
6261   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6262         (unspec:SVE_FULL_F
6263           [(match_operand:<VPRED> 1 "register_operand")
6264            (unspec:SVE_FULL_F
6265              [(match_dup 1)
6266               (const_int SVE_STRICT_GP)
6267               (unspec:SVE_FULL_F
6268                 [(match_dup 1)
6269                  (const_int SVE_STRICT_GP)
6270                  (match_operand:SVE_FULL_F 2 "register_operand")
6271                  (match_operand:SVE_FULL_F 3 "register_operand")]
6272                 UNSPEC_COND_FSUB)]
6273              UNSPEC_COND_FABS)
6274            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6275           UNSPEC_SEL))]
6276   "TARGET_SVE"
6277 {
6278   if (rtx_equal_p (operands[3], operands[4]))
6279     std::swap (operands[2], operands[3]);
6280 })
6281
6282 ;; Predicated floating-point absolute difference, merging with the first
6283 ;; input.
6284 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
6285   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6286         (unspec:SVE_FULL_F
6287           [(match_operand:<VPRED> 1 "register_operand")
6288            (unspec:SVE_FULL_F
6289              [(match_operand 4)
6290               (const_int SVE_RELAXED_GP)
6291               (unspec:SVE_FULL_F
6292                 [(match_operand 5)
6293                  (const_int SVE_RELAXED_GP)
6294                  (match_operand:SVE_FULL_F 2 "register_operand")
6295                  (match_operand:SVE_FULL_F 3 "register_operand")]
6296                 UNSPEC_COND_FSUB)]
6297              UNSPEC_COND_FABS)
6298            (match_dup 2)]
6299           UNSPEC_SEL))]
6300   "TARGET_SVE"
6301   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6302      [ w        , Upl , 0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6303      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6304   }
6305   "&& (!rtx_equal_p (operands[1], operands[4])
6306        || !rtx_equal_p (operands[1], operands[5]))"
6307   {
6308     operands[4] = copy_rtx (operands[1]);
6309     operands[5] = copy_rtx (operands[1]);
6310   }
6311 )
6312
6313 (define_insn "*aarch64_cond_abd<mode>_2_strict"
6314   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6315         (unspec:SVE_FULL_F
6316           [(match_operand:<VPRED> 1 "register_operand")
6317            (unspec:SVE_FULL_F
6318              [(match_dup 1)
6319               (match_operand:SI 4 "aarch64_sve_gp_strictness")
6320               (unspec:SVE_FULL_F
6321                 [(match_dup 1)
6322                  (match_operand:SI 5 "aarch64_sve_gp_strictness")
6323                  (match_operand:SVE_FULL_F 2 "register_operand")
6324                  (match_operand:SVE_FULL_F 3 "register_operand")]
6325                 UNSPEC_COND_FSUB)]
6326              UNSPEC_COND_FABS)
6327            (match_dup 2)]
6328           UNSPEC_SEL))]
6329   "TARGET_SVE"
6330   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6331      [ w        , Upl , 0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6332      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6333   }
6334 )
6335
6336 ;; Predicated floating-point absolute difference, merging with the second
6337 ;; input.
6338 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
6339   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6340         (unspec:SVE_FULL_F
6341           [(match_operand:<VPRED> 1 "register_operand")
6342            (unspec:SVE_FULL_F
6343              [(match_operand 4)
6344               (const_int SVE_RELAXED_GP)
6345               (unspec:SVE_FULL_F
6346                 [(match_operand 5)
6347                  (const_int SVE_RELAXED_GP)
6348                  (match_operand:SVE_FULL_F 2 "register_operand")
6349                  (match_operand:SVE_FULL_F 3 "register_operand")]
6350                 UNSPEC_COND_FSUB)]
6351              UNSPEC_COND_FABS)
6352            (match_dup 3)]
6353           UNSPEC_SEL))]
6354   "TARGET_SVE"
6355   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6356      [ w        , Upl , w , 0 ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6357      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6358   }
6359   "&& (!rtx_equal_p (operands[1], operands[4])
6360        || !rtx_equal_p (operands[1], operands[5]))"
6361   {
6362     operands[4] = copy_rtx (operands[1]);
6363     operands[5] = copy_rtx (operands[1]);
6364   }
6365 )
6366
6367 (define_insn "*aarch64_cond_abd<mode>_3_strict"
6368   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6369         (unspec:SVE_FULL_F
6370           [(match_operand:<VPRED> 1 "register_operand")
6371            (unspec:SVE_FULL_F
6372              [(match_dup 1)
6373               (match_operand:SI 4 "aarch64_sve_gp_strictness")
6374               (unspec:SVE_FULL_F
6375                 [(match_dup 1)
6376                  (match_operand:SI 5 "aarch64_sve_gp_strictness")
6377                  (match_operand:SVE_FULL_F 2 "register_operand")
6378                  (match_operand:SVE_FULL_F 3 "register_operand")]
6379                 UNSPEC_COND_FSUB)]
6380              UNSPEC_COND_FABS)
6381            (match_dup 3)]
6382           UNSPEC_SEL))]
6383   "TARGET_SVE"
6384   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
6385      [ w        , Upl , w , 0 ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6386      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6387   }
6388 )
6389
6390 ;; Predicated floating-point absolute difference, merging with an
6391 ;; independent value.
6392 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
6393   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6394         (unspec:SVE_FULL_F
6395           [(match_operand:<VPRED> 1 "register_operand")
6396            (unspec:SVE_FULL_F
6397              [(match_operand 5)
6398               (const_int SVE_RELAXED_GP)
6399               (unspec:SVE_FULL_F
6400                 [(match_operand 6)
6401                  (const_int SVE_RELAXED_GP)
6402                  (match_operand:SVE_FULL_F 2 "register_operand")
6403                  (match_operand:SVE_FULL_F 3 "register_operand")]
6404                 UNSPEC_COND_FSUB)]
6405              UNSPEC_COND_FABS)
6406            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6407           UNSPEC_SEL))]
6408   "TARGET_SVE
6409    && !rtx_equal_p (operands[2], operands[4])
6410    && !rtx_equal_p (operands[3], operands[4])"
6411   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
6412      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6413      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6414      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6415      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6416      [ ?&w      , Upl , w , w , w   ] #
6417   }
6418   "&& 1"
6419   {
6420     if (reload_completed
6421         && register_operand (operands[4], <MODE>mode)
6422         && !rtx_equal_p (operands[0], operands[4]))
6423       {
6424         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6425                                                  operands[4], operands[1]));
6426         operands[4] = operands[3] = operands[0];
6427       }
6428     else if (!rtx_equal_p (operands[1], operands[5])
6429              || !rtx_equal_p (operands[1], operands[6]))
6430       {
6431         operands[5] = copy_rtx (operands[1]);
6432         operands[6] = copy_rtx (operands[1]);
6433       }
6434     else
6435       FAIL;
6436   }
6437   [(set_attr "movprfx" "yes")]
6438 )
6439
6440 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
6441   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6442         (unspec:SVE_FULL_F
6443           [(match_operand:<VPRED> 1 "register_operand")
6444            (unspec:SVE_FULL_F
6445              [(match_dup 1)
6446               (match_operand:SI 5 "aarch64_sve_gp_strictness")
6447               (unspec:SVE_FULL_F
6448                 [(match_dup 1)
6449                  (match_operand:SI 6 "aarch64_sve_gp_strictness")
6450                  (match_operand:SVE_FULL_F 2 "register_operand")
6451                  (match_operand:SVE_FULL_F 3 "register_operand")]
6452                 UNSPEC_COND_FSUB)]
6453              UNSPEC_COND_FABS)
6454            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6455           UNSPEC_SEL))]
6456   "TARGET_SVE
6457    && !rtx_equal_p (operands[2], operands[4])
6458    && !rtx_equal_p (operands[3], operands[4])"
6459   {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
6460      [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6461      [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6462      [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6463      [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6464      [ ?&w      , Upl , w , w , w   ] #
6465   }
6466   "&& reload_completed
6467    && register_operand (operands[4], <MODE>mode)
6468    && !rtx_equal_p (operands[0], operands[4])"
6469   {
6470     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6471                                              operands[4], operands[1]));
6472     operands[4] = operands[3] = operands[0];
6473   }
6474   [(set_attr "movprfx" "yes")]
6475 )
6476
6477 ;; -------------------------------------------------------------------------
6478 ;; ---- [FP] Multiplication
6479 ;; -------------------------------------------------------------------------
6480 ;; Includes:
6481 ;; - BFMUL (SVE_B16B16)
6482 ;; - FMUL
6483 ;; -------------------------------------------------------------------------
6484
6485 ;; Predicated floating-point multiplication.
6486 (define_insn "@aarch64_pred_<optab><mode>"
6487   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6488         (unspec:SVE_FULL_F
6489           [(match_operand:<VPRED> 1 "register_operand")
6490            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6491            (match_operand:SVE_FULL_F 2 "register_operand")
6492            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
6493           SVE_COND_FP_MUL))]
6494   "TARGET_SVE"
6495   {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
6496      [ w        , Upl , %0 , vsM , i   ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6497      [ w        , Upl , w  , w   , Z   ; *              ] #
6498      [ w        , Upl , 0  , w   , Ui1 ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6499      [ ?&w      , Upl , w  , vsM , i   ; yes            ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6500      [ ?&w      , Upl , w  , w   , Ui1 ; yes            ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6501   }
6502 )
6503
6504 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6505 ;; SVE_COND_FP_BINARY_I1.
6506
6507 ;; Unpredicated multiplication by selected lanes.
6508 (define_insn "@aarch64_mul_lane_<mode>"
6509   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
6510         (mult:SVE_FULL_F_BF
6511           (unspec:SVE_FULL_F_BF
6512             [(match_operand:SVE_FULL_F_BF 2 "register_operand" "<sve_lane_con>")
6513              (match_operand:SI 3 "const_int_operand")]
6514             UNSPEC_SVE_LANE_SELECT)
6515           (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")))]
6516   "TARGET_SVE"
6517   "<b>fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6518 )
6519
6520 ;; -------------------------------------------------------------------------
6521 ;; ---- [FP] Division
6522 ;; -------------------------------------------------------------------------
6523 ;; The patterns in this section are synthetic.
6524 ;; -------------------------------------------------------------------------
6525
6526 (define_expand "div<mode>3"
6527   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6528         (unspec:SVE_FULL_F
6529           [(match_dup 3)
6530            (const_int SVE_RELAXED_GP)
6531            (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6532            (match_operand:SVE_FULL_F 2 "register_operand")]
6533           UNSPEC_COND_FDIV))]
6534   "TARGET_SVE"
6535   {
6536     if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6537       DONE;
6538
6539     operands[1] = force_reg (<MODE>mode, operands[1]);
6540     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6541   }
6542 )
6543
6544 (define_expand "@aarch64_frecpe<mode>"
6545   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6546         (unspec:SVE_FULL_F
6547           [(match_operand:SVE_FULL_F 1 "register_operand")]
6548           UNSPEC_FRECPE))]
6549   "TARGET_SVE"
6550 )
6551
6552 (define_expand "@aarch64_frecps<mode>"
6553   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6554         (unspec:SVE_FULL_F
6555           [(match_operand:SVE_FULL_F 1 "register_operand")
6556            (match_operand:SVE_FULL_F 2 "register_operand")]
6557           UNSPEC_FRECPS))]
6558   "TARGET_SVE"
6559 )
6560
6561 ;; -------------------------------------------------------------------------
6562 ;; ---- [FP] Binary logical operations
6563 ;; -------------------------------------------------------------------------
6564 ;; Includes
6565 ;; - AND
6566 ;; - EOR
6567 ;; - ORR
6568 ;; -------------------------------------------------------------------------
6569
6570 ;; Binary logical operations on floating-point modes.  We avoid subregs
6571 ;; by providing this, but we need to use UNSPECs since rtx logical ops
6572 ;; aren't defined for floating-point modes.
6573 (define_insn "*<optab><mode>3"
6574   [(set (match_operand:SVE_F 0 "register_operand" "=w")
6575         (unspec:SVE_F
6576           [(match_operand:SVE_F 1 "register_operand" "w")
6577            (match_operand:SVE_F 2 "register_operand" "w")]
6578           LOGICALF))]
6579   "TARGET_SVE"
6580   "<logicalf_op>\t%0.d, %1.d, %2.d"
6581 )
6582
6583 ;; -------------------------------------------------------------------------
6584 ;; ---- [FP] Sign copying
6585 ;; -------------------------------------------------------------------------
6586 ;; The patterns in this section are synthetic.
6587 ;; -------------------------------------------------------------------------
6588
6589 (define_expand "copysign<mode>3"
6590   [(match_operand:SVE_FULL_F 0 "register_operand")
6591    (match_operand:SVE_FULL_F 1 "register_operand")
6592    (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
6593   "TARGET_SVE"
6594   {
6595     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6596     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6597     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6598     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6599
6600     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6601     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6602
6603     rtx v_sign_bitmask
6604       = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6605                                            HOST_WIDE_INT_M1U << bits);
6606
6607     /* copysign (x, -1) should instead be expanded as orr with the sign
6608        bit.  */
6609     if (!REG_P (operands[2]))
6610       {
6611         rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
6612         if (GET_CODE (op2_elt) == CONST_DOUBLE
6613             && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6614           {
6615             emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
6616             emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6617             DONE;
6618           }
6619       }
6620
6621     operands[2] = force_reg (<MODE>mode, operands[2]);
6622     emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
6623     emit_insn (gen_and<v_int_equiv>3
6624                (mant, arg1,
6625                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6626                                                    ~(HOST_WIDE_INT_M1U
6627                                                      << bits))));
6628     emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6629     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6630     DONE;
6631   }
6632 )
6633
6634 (define_expand "cond_copysign<mode>"
6635   [(match_operand:SVE_FULL_F 0 "register_operand")
6636    (match_operand:<VPRED> 1 "register_operand")
6637    (match_operand:SVE_FULL_F 2 "register_operand")
6638    (match_operand:SVE_FULL_F 3 "nonmemory_operand")
6639    (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6640   "TARGET_SVE"
6641   {
6642     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6643     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6644     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6645     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6646
6647     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6648     rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode);
6649     rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode);
6650
6651     rtx v_sign_bitmask
6652       = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6653                                            HOST_WIDE_INT_M1U << bits);
6654
6655     /* copysign (x, -1) should instead be expanded as orr with the sign
6656        bit.  */
6657     if (!REG_P (operands[3]))
6658       {
6659         rtx op2_elt = unwrap_const_vec_duplicate (operands[3]);
6660         if (GET_CODE (op2_elt) == CONST_DOUBLE
6661             && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6662           {
6663             arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask);
6664             emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2,
6665                                                   arg3, arg4));
6666             emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6667             DONE;
6668           }
6669       }
6670
6671     operands[2] = force_reg (<MODE>mode, operands[3]);
6672     emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask));
6673     emit_insn (gen_and<v_int_equiv>3
6674                (mant, arg2,
6675                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6676                                                    ~(HOST_WIDE_INT_M1U
6677                                                      << bits))));
6678     emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant,
6679                                           arg4));
6680     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6681     DONE;
6682   }
6683 )
6684
6685 (define_expand "xorsign<mode>3"
6686   [(match_operand:SVE_FULL_F 0 "register_operand")
6687    (match_operand:SVE_FULL_F 1 "register_operand")
6688    (match_operand:SVE_FULL_F 2 "register_operand")]
6689   "TARGET_SVE"
6690   {
6691     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6692     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6693     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6694
6695     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6696     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6697
6698     emit_insn (gen_and<v_int_equiv>3
6699                (sign, arg2,
6700                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6701                                                    HOST_WIDE_INT_M1U
6702                                                    << bits)));
6703     emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6704     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6705     DONE;
6706   }
6707 )
6708
6709 ;; -------------------------------------------------------------------------
6710 ;; ---- [FP] Maximum and minimum
6711 ;; -------------------------------------------------------------------------
6712 ;; Includes:
6713 ;; - FMAX
6714 ;; - FMAXNM
6715 ;; - FMIN
6716 ;; - FMINNM
6717 ;; -------------------------------------------------------------------------
6718
6719 ;; Predicated floating-point maximum/minimum.
6720 (define_insn "@aarch64_pred_<optab><mode>"
6721   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6722         (unspec:SVE_FULL_F
6723           [(match_operand:<VPRED> 1 "register_operand")
6724            (match_operand:SI 4 "aarch64_sve_gp_strictness")
6725            (match_operand:SVE_FULL_F 2 "register_operand")
6726            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
6727           SVE_COND_FP_MAXMIN))]
6728   "TARGET_SVE"
6729   {@ [ cons: =0 , 1   , 2  , 3   ; attrs: movprfx ]
6730      [ w        , Upl , %0 , vsB ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6731      [ w        , Upl , 0  , w   ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6732      [ ?&w      , Upl , w  , vsB ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6733      [ ?&w      , Upl , w  , w   ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6734   }
6735 )
6736
6737 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6738 ;; SVE_COND_FP_BINARY_I1.
6739
6740 ;; -------------------------------------------------------------------------
6741 ;; ---- [PRED] Binary logical operations
6742 ;; -------------------------------------------------------------------------
6743 ;; Includes:
6744 ;; - AND
6745 ;; - ANDS
6746 ;; - EOR
6747 ;; - EORS
6748 ;; - ORR
6749 ;; - ORRS
6750 ;; -------------------------------------------------------------------------
6751
6752 ;; Predicate AND.  We can reuse one of the inputs as the GP.
6753 ;; Doubling the second operand is the preferred implementation
6754 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
6755 (define_insn "and<mode>3"
6756   [(set (match_operand:PRED_ALL 0 "register_operand")
6757         (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
6758                       (match_operand:PRED_ALL 2 "register_operand")))]
6759   "TARGET_SVE"
6760   {@ [ cons: =0, 1   , 2   ; attrs: pred_clobber ]
6761      [ &Upa    , Upa , Upa ; yes                 ] and\t%0.b, %1/z, %2.b, %2.b
6762      [ ?Upa    , 0Upa, 0Upa; yes                 ] ^
6763      [ Upa     , Upa , Upa ; no                  ] ^
6764   }
6765 )
6766
6767 ;; Unpredicated predicate EOR and ORR.
6768 (define_expand "<optab><mode>3"
6769   [(set (match_operand:PRED_ALL 0 "register_operand")
6770         (and:PRED_ALL
6771           (LOGICAL_OR:PRED_ALL
6772             (match_operand:PRED_ALL 1 "register_operand")
6773             (match_operand:PRED_ALL 2 "register_operand"))
6774           (match_dup 3)))]
6775   "TARGET_SVE"
6776   {
6777     operands[3] = aarch64_ptrue_reg (<MODE>mode);
6778   }
6779 )
6780
6781 ;; Predicated predicate AND, EOR and ORR.
6782 (define_insn "@aarch64_pred_<optab><mode>_z"
6783   [(set (match_operand:PRED_ALL 0 "register_operand")
6784         (and:PRED_ALL
6785           (LOGICAL:PRED_ALL
6786             (match_operand:PRED_ALL 2 "register_operand")
6787             (match_operand:PRED_ALL 3 "register_operand"))
6788           (match_operand:PRED_ALL 1 "register_operand")))]
6789   "TARGET_SVE"
6790   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6791      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical>\t%0.b, %1/z, %2.b, %3.b
6792      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6793      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6794   }
6795 )
6796
6797 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
6798 ;; the GP.  Store the result in operand 0 and set the flags in the same
6799 ;; way as for PTEST.
6800 (define_insn "*<optab><mode>3_cc"
6801   [(set (reg:CC_NZC CC_REGNUM)
6802         (unspec:CC_NZC
6803           [(match_operand:VNx16BI 1 "register_operand")
6804            (match_operand 4)
6805            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6806            (and:PRED_ALL
6807              (LOGICAL:PRED_ALL
6808                (match_operand:PRED_ALL 2 "register_operand")
6809                (match_operand:PRED_ALL 3 "register_operand"))
6810              (match_dup 4))]
6811           UNSPEC_PTEST))
6812    (set (match_operand:PRED_ALL 0 "register_operand")
6813         (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
6814                       (match_dup 4)))]
6815   "TARGET_SVE"
6816   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6817      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6818      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6819      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6820   }
6821 )
6822
6823 ;; Same with just the flags result.
6824 (define_insn "*<optab><mode>3_ptest"
6825   [(set (reg:CC_NZC CC_REGNUM)
6826         (unspec:CC_NZC
6827           [(match_operand:VNx16BI 1 "register_operand")
6828            (match_operand 4)
6829            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6830            (and:PRED_ALL
6831              (LOGICAL:PRED_ALL
6832                (match_operand:PRED_ALL 2 "register_operand")
6833                (match_operand:PRED_ALL 3 "register_operand"))
6834              (match_dup 4))]
6835           UNSPEC_PTEST))
6836    (clobber (match_scratch:VNx16BI 0))]
6837   "TARGET_SVE"
6838   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6839      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6840      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6841      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6842   }
6843 )
6844
6845 ;; -------------------------------------------------------------------------
6846 ;; ---- [PRED] Binary logical operations (inverted second input)
6847 ;; -------------------------------------------------------------------------
6848 ;; Includes:
6849 ;; - BIC
6850 ;; - ORN
6851 ;; -------------------------------------------------------------------------
6852
6853 ;; Predicated predicate BIC and ORN.
6854 (define_insn "aarch64_pred_<nlogical><mode>_z"
6855   [(set (match_operand:PRED_ALL 0 "register_operand")
6856         (and:PRED_ALL
6857           (NLOGICAL:PRED_ALL
6858             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand"))
6859             (match_operand:PRED_ALL 2 "register_operand"))
6860           (match_operand:PRED_ALL 1 "register_operand")))]
6861   "TARGET_SVE"
6862   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6863      [ &Upa    , Upa , Upa , Upa ; yes                 ] <nlogical>\t%0.b, %1/z, %2.b, %3.b
6864      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6865      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6866   }
6867 )
6868
6869 ;; Same, but set the flags as a side-effect.
6870 (define_insn "*<nlogical><mode>3_cc"
6871   [(set (reg:CC_NZC CC_REGNUM)
6872         (unspec:CC_NZC
6873           [(match_operand:VNx16BI 1 "register_operand")
6874            (match_operand 4)
6875            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6876            (and:PRED_ALL
6877              (NLOGICAL:PRED_ALL
6878                (not:PRED_ALL
6879                  (match_operand:PRED_ALL 3 "register_operand"))
6880                (match_operand:PRED_ALL 2 "register_operand"))
6881              (match_dup 4))]
6882           UNSPEC_PTEST))
6883    (set (match_operand:PRED_ALL 0 "register_operand")
6884         (and:PRED_ALL (NLOGICAL:PRED_ALL
6885                         (not:PRED_ALL (match_dup 3))
6886                         (match_dup 2))
6887                       (match_dup 4)))]
6888   "TARGET_SVE"
6889   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6890      [ &Upa    , Upa , Upa , Upa ; yes                 ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6891      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6892      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6893   }
6894 )
6895
6896 ;; Same with just the flags result.
6897 (define_insn "*<nlogical><mode>3_ptest"
6898   [(set (reg:CC_NZC CC_REGNUM)
6899         (unspec:CC_NZC
6900           [(match_operand:VNx16BI 1 "register_operand")
6901            (match_operand 4)
6902            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6903            (and:PRED_ALL
6904              (NLOGICAL:PRED_ALL
6905                (not:PRED_ALL
6906                  (match_operand:PRED_ALL 3 "register_operand"))
6907                (match_operand:PRED_ALL 2 "register_operand"))
6908              (match_dup 4))]
6909           UNSPEC_PTEST))
6910    (clobber (match_scratch:VNx16BI 0))]
6911   "TARGET_SVE"
6912   {@ [ cons:  =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6913      [ &Upa     , Upa , Upa , Upa ; yes                 ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6914      [ ?Upa     , 0Upa, 0Upa, 0Upa; yes                 ] ^
6915      [ Upa      , Upa , Upa , Upa ; no                  ] ^
6916   }
6917 )
6918
6919 ;; -------------------------------------------------------------------------
6920 ;; ---- [PRED] Binary logical operations (inverted result)
6921 ;; -------------------------------------------------------------------------
6922 ;; Includes:
6923 ;; - NAND
6924 ;; - NOR
6925 ;; -------------------------------------------------------------------------
6926
6927 ;; Predicated predicate NAND and NOR.
6928 (define_insn "aarch64_pred_<logical_nn><mode>_z"
6929   [(set (match_operand:PRED_ALL 0 "register_operand")
6930         (and:PRED_ALL
6931           (NLOGICAL:PRED_ALL
6932             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand"))
6933             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand")))
6934           (match_operand:PRED_ALL 1 "register_operand")))]
6935   "TARGET_SVE"
6936   {@ [ cons: =0,  1  , 2   , 3   ; attrs: pred_clobber ]
6937      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical_nn>\t%0.b, %1/z, %2.b, %3.b
6938      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6939      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6940   }
6941 )
6942
6943 ;; Same, but set the flags as a side-effect.
6944 (define_insn "*<logical_nn><mode>3_cc"
6945   [(set (reg:CC_NZC CC_REGNUM)
6946         (unspec:CC_NZC
6947           [(match_operand:VNx16BI 1 "register_operand")
6948            (match_operand 4)
6949            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6950            (and:PRED_ALL
6951              (NLOGICAL:PRED_ALL
6952                (not:PRED_ALL
6953                  (match_operand:PRED_ALL 2 "register_operand"))
6954                (not:PRED_ALL
6955                  (match_operand:PRED_ALL 3 "register_operand")))
6956              (match_dup 4))]
6957           UNSPEC_PTEST))
6958    (set (match_operand:PRED_ALL 0 "register_operand")
6959         (and:PRED_ALL (NLOGICAL:PRED_ALL
6960                         (not:PRED_ALL (match_dup 2))
6961                         (not:PRED_ALL (match_dup 3)))
6962                       (match_dup 4)))]
6963   "TARGET_SVE"
6964   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6965      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6966      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6967      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6968   }
6969 )
6970
6971 ;; Same with just the flags result.
6972 (define_insn "*<logical_nn><mode>3_ptest"
6973   [(set (reg:CC_NZC CC_REGNUM)
6974         (unspec:CC_NZC
6975           [(match_operand:VNx16BI 1 "register_operand")
6976            (match_operand 4)
6977            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6978            (and:PRED_ALL
6979              (NLOGICAL:PRED_ALL
6980                (not:PRED_ALL
6981                  (match_operand:PRED_ALL 2 "register_operand"))
6982                (not:PRED_ALL
6983                  (match_operand:PRED_ALL 3 "register_operand")))
6984              (match_dup 4))]
6985           UNSPEC_PTEST))
6986    (clobber (match_scratch:VNx16BI 0))]
6987   "TARGET_SVE"
6988   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
6989      [ &Upa    , Upa , Upa , Upa ; yes                 ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6990      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
6991      [ Upa     , Upa , Upa , Upa ; no                  ] ^
6992   }
6993 )
6994
6995 ;; =========================================================================
6996 ;; == Ternary arithmetic
6997 ;; =========================================================================
6998
6999 ;; -------------------------------------------------------------------------
7000 ;; ---- [INT] MLA and MAD
7001 ;; -------------------------------------------------------------------------
7002 ;; Includes:
7003 ;; - MAD
7004 ;; - MLA
7005 ;; -------------------------------------------------------------------------
7006
7007 ;; Unpredicated integer addition of product.
7008 (define_expand "fma<mode>4"
7009   [(set (match_operand:SVE_I 0 "register_operand")
7010         (plus:SVE_I
7011           (unspec:SVE_I
7012             [(match_dup 4)
7013              (mult:SVE_I
7014                (match_operand:SVE_I 1 "register_operand")
7015                (match_operand:SVE_I 2 "nonmemory_operand"))]
7016             UNSPEC_PRED_X)
7017           (match_operand:SVE_I 3 "register_operand")))]
7018   "TARGET_SVE"
7019   {
7020     if (aarch64_prepare_sve_int_fma (operands, PLUS))
7021       DONE;
7022     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7023   }
7024 )
7025
7026 ;; Predicated integer addition of product.
7027 (define_insn "@aarch64_pred_fma<mode>"
7028   [(set (match_operand:SVE_I 0 "register_operand")
7029         (plus:SVE_I
7030           (unspec:SVE_I
7031             [(match_operand:<VPRED> 1 "register_operand")
7032              (mult:SVE_I
7033                (match_operand:SVE_I 2 "register_operand")
7034                (match_operand:SVE_I 3 "register_operand"))]
7035             UNSPEC_PRED_X)
7036           (match_operand:SVE_I 4 "register_operand")))]
7037   "TARGET_SVE"
7038   {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
7039      [ w        , Upl , %0 , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7040      [ w        , Upl , w  , w , 0 ; *              ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7041      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7042   }
7043 )
7044
7045 ;; Predicated integer addition of product with merging.
7046 (define_expand "cond_fma<mode>"
7047   [(set (match_operand:SVE_I 0 "register_operand")
7048         (unspec:SVE_I
7049           [(match_operand:<VPRED> 1 "register_operand")
7050            (plus:SVE_I
7051              (mult:SVE_I
7052                (match_operand:SVE_I 2 "register_operand")
7053                (match_operand:SVE_I 3 "general_operand"))
7054              (match_operand:SVE_I 4 "register_operand"))
7055            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7056           UNSPEC_SEL))]
7057   "TARGET_SVE"
7058   {
7059     if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
7060       DONE;
7061     /* Swap the multiplication operands if the fallback value is the
7062        second of the two.  */
7063     if (rtx_equal_p (operands[3], operands[5]))
7064       std::swap (operands[2], operands[3]);
7065   }
7066 )
7067
7068 ;; Predicated integer addition of product, merging with the first input.
7069 (define_insn "*cond_fma<mode>_2"
7070   [(set (match_operand:SVE_I 0 "register_operand")
7071         (unspec:SVE_I
7072           [(match_operand:<VPRED> 1 "register_operand")
7073            (plus:SVE_I
7074              (mult:SVE_I
7075                (match_operand:SVE_I 2 "register_operand")
7076                (match_operand:SVE_I 3 "register_operand"))
7077              (match_operand:SVE_I 4 "register_operand"))
7078            (match_dup 2)]
7079           UNSPEC_SEL))]
7080   "TARGET_SVE"
7081   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7082      [ w        , Upl , 0 , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7083      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7084   }
7085 )
7086
7087 ;; Predicated integer addition of product, merging with the third input.
7088 (define_insn "*cond_fma<mode>_4"
7089   [(set (match_operand:SVE_I 0 "register_operand")
7090         (unspec:SVE_I
7091           [(match_operand:<VPRED> 1 "register_operand")
7092            (plus:SVE_I
7093              (mult:SVE_I
7094                (match_operand:SVE_I 2 "register_operand")
7095                (match_operand:SVE_I 3 "register_operand"))
7096              (match_operand:SVE_I 4 "register_operand"))
7097            (match_dup 4)]
7098           UNSPEC_SEL))]
7099   "TARGET_SVE"
7100   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7101      [ w        , Upl , w , w , 0 ; *              ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7102      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7103   }
7104 )
7105
7106 ;; Predicated integer addition of product, merging with an independent value.
7107 (define_insn_and_rewrite "*cond_fma<mode>_any"
7108   [(set (match_operand:SVE_I 0 "register_operand")
7109         (unspec:SVE_I
7110           [(match_operand:<VPRED> 1 "register_operand")
7111            (plus:SVE_I
7112              (mult:SVE_I
7113                (match_operand:SVE_I 2 "register_operand")
7114                (match_operand:SVE_I 3 "register_operand"))
7115              (match_operand:SVE_I 4 "register_operand"))
7116            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7117           UNSPEC_SEL))]
7118   "TARGET_SVE
7119    && !rtx_equal_p (operands[2], operands[5])
7120    && !rtx_equal_p (operands[3], operands[5])
7121    && !rtx_equal_p (operands[4], operands[5])"
7122   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7123      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7124      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7125      [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7126      [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7127      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7128      [ ?&w      , Upl , w , w , w , w   ] #
7129   }
7130   "&& reload_completed
7131    && register_operand (operands[5], <MODE>mode)
7132    && !rtx_equal_p (operands[0], operands[5])"
7133   {
7134     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7135                                              operands[5], operands[1]));
7136     operands[5] = operands[4] = operands[0];
7137   }
7138   [(set_attr "movprfx" "yes")]
7139 )
7140
7141 ;; -------------------------------------------------------------------------
7142 ;; ---- [INT] MLS and MSB
7143 ;; -------------------------------------------------------------------------
7144 ;; Includes:
7145 ;; - MLS
7146 ;; - MSB
7147 ;; -------------------------------------------------------------------------
7148
7149 ;; Unpredicated integer subtraction of product.
7150 (define_expand "fnma<mode>4"
7151   [(set (match_operand:SVE_I 0 "register_operand")
7152         (minus:SVE_I
7153           (match_operand:SVE_I 3 "register_operand")
7154           (unspec:SVE_I
7155             [(match_dup 4)
7156              (mult:SVE_I
7157                (match_operand:SVE_I 1 "register_operand")
7158                (match_operand:SVE_I 2 "general_operand"))]
7159             UNSPEC_PRED_X)))]
7160   "TARGET_SVE"
7161   {
7162     if (aarch64_prepare_sve_int_fma (operands, MINUS))
7163       DONE;
7164     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7165   }
7166 )
7167
7168 ;; Predicated integer subtraction of product.
7169 (define_insn "@aarch64_pred_fnma<mode>"
7170   [(set (match_operand:SVE_I 0 "register_operand")
7171         (minus:SVE_I
7172           (match_operand:SVE_I 4 "register_operand")
7173           (unspec:SVE_I
7174             [(match_operand:<VPRED> 1 "register_operand")
7175              (mult:SVE_I
7176                (match_operand:SVE_I 2 "register_operand")
7177                (match_operand:SVE_I 3 "register_operand"))]
7178             UNSPEC_PRED_X)))]
7179   "TARGET_SVE"
7180   {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
7181      [ w        , Upl , %0 , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7182      [ w        , Upl , w  , w , 0 ; *              ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7183      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7184   }
7185 )
7186
7187 ;; Predicated integer subtraction of product with merging.
7188 (define_expand "cond_fnma<mode>"
7189   [(set (match_operand:SVE_I 0 "register_operand")
7190    (unspec:SVE_I
7191         [(match_operand:<VPRED> 1 "register_operand")
7192          (minus:SVE_I
7193            (match_operand:SVE_I 4 "register_operand")
7194            (mult:SVE_I
7195              (match_operand:SVE_I 2 "register_operand")
7196              (match_operand:SVE_I 3 "general_operand")))
7197          (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7198         UNSPEC_SEL))]
7199   "TARGET_SVE"
7200   {
7201     if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
7202       DONE;
7203     /* Swap the multiplication operands if the fallback value is the
7204        second of the two.  */
7205     if (rtx_equal_p (operands[3], operands[5]))
7206       std::swap (operands[2], operands[3]);
7207   }
7208 )
7209
7210 ;; Predicated integer subtraction of product, merging with the first input.
7211 (define_insn "*cond_fnma<mode>_2"
7212   [(set (match_operand:SVE_I 0 "register_operand")
7213         (unspec:SVE_I
7214           [(match_operand:<VPRED> 1 "register_operand")
7215            (minus:SVE_I
7216              (match_operand:SVE_I 4 "register_operand")
7217              (mult:SVE_I
7218                (match_operand:SVE_I 2 "register_operand")
7219                (match_operand:SVE_I 3 "register_operand")))
7220            (match_dup 2)]
7221           UNSPEC_SEL))]
7222   "TARGET_SVE"
7223   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7224      [ w        , Upl , 0 , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7225      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7226   }
7227 )
7228
7229 ;; Predicated integer subtraction of product, merging with the third input.
7230 (define_insn "*cond_fnma<mode>_4"
7231   [(set (match_operand:SVE_I 0 "register_operand")
7232         (unspec:SVE_I
7233           [(match_operand:<VPRED> 1 "register_operand")
7234            (minus:SVE_I
7235              (match_operand:SVE_I 4 "register_operand")
7236              (mult:SVE_I
7237                (match_operand:SVE_I 2 "register_operand")
7238                (match_operand:SVE_I 3 "register_operand")))
7239            (match_dup 4)]
7240           UNSPEC_SEL))]
7241   "TARGET_SVE"
7242   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7243      [ w        , Upl , w , w , 0 ; *              ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7244      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7245   }
7246 )
7247
7248 ;; Predicated integer subtraction of product, merging with an
7249 ;; independent value.
7250 (define_insn_and_rewrite "*cond_fnma<mode>_any"
7251   [(set (match_operand:SVE_I 0 "register_operand")
7252         (unspec:SVE_I
7253           [(match_operand:<VPRED> 1 "register_operand")
7254            (minus:SVE_I
7255              (match_operand:SVE_I 4 "register_operand")
7256              (mult:SVE_I
7257                (match_operand:SVE_I 2 "register_operand")
7258                (match_operand:SVE_I 3 "register_operand")))
7259            (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7260           UNSPEC_SEL))]
7261   "TARGET_SVE
7262    && !rtx_equal_p (operands[2], operands[5])
7263    && !rtx_equal_p (operands[3], operands[5])
7264    && !rtx_equal_p (operands[4], operands[5])"
7265   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7266      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7267      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7268      [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7269      [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7270      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7271      [ ?&w      , Upl , w , w , w , w   ] #
7272   }
7273   "&& reload_completed
7274    && register_operand (operands[5], <MODE>mode)
7275    && !rtx_equal_p (operands[0], operands[5])"
7276   {
7277     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7278                                              operands[5], operands[1]));
7279     operands[5] = operands[4] = operands[0];
7280   }
7281   [(set_attr "movprfx" "yes")]
7282 )
7283
7284 ;; -------------------------------------------------------------------------
7285 ;; ---- [INT] Dot product
7286 ;; -------------------------------------------------------------------------
7287 ;; Includes:
7288 ;; - SDOT
7289 ;; - SUDOT   (I8MM)
7290 ;; - UDOT
7291 ;; - USDOT   (I8MM)
7292 ;; -------------------------------------------------------------------------
7293
7294 ;; Four-element integer dot-product with accumulation.
7295 (define_insn "<sur>dot_prod<mode><vsi2qi>"
7296   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7297         (plus:SVE_FULL_SDI
7298           (unspec:SVE_FULL_SDI
7299             [(match_operand:<VSI2QI> 1 "register_operand")
7300              (match_operand:<VSI2QI> 2 "register_operand")]
7301             DOTPROD)
7302           (match_operand:SVE_FULL_SDI 3 "register_operand")))]
7303   "TARGET_SVE"
7304   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7305      [ w        , w , w , 0 ; *              ] <sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7306      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7307   }
7308 )
7309
7310 ;; Four-element integer dot-product by selected lanes with accumulation.
7311 (define_insn "@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>"
7312   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7313         (plus:SVE_FULL_SDI
7314           (unspec:SVE_FULL_SDI
7315             [(match_operand:SVE_FULL_BHI 1 "register_operand")
7316              (unspec:SVE_FULL_BHI
7317                [(match_operand:SVE_FULL_BHI 2 "register_operand")
7318                 (match_operand:SI 3 "const_int_operand")]
7319                UNSPEC_SVE_LANE_SELECT)]
7320             DOTPROD)
7321           (match_operand:SVE_FULL_SDI 4 "register_operand")))]
7322   "TARGET_SVE
7323    && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
7324        || (TARGET_SVE2p1_OR_SME2
7325            && <SVE_FULL_SDI:elem_bits> == 32
7326            && <SVE_FULL_BHI:elem_bits> == 16))"
7327   {@ [ cons: =0 , 1 , 2                           , 4 ; attrs: movprfx ]
7328      [ w        , w , <SVE_FULL_SDI:sve_lane_con> , 0 ; *              ] <sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7329      [ ?&w      , w , <SVE_FULL_SDI:sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7330   }
7331 )
7332
7333 (define_insn "@<sur>dot_prod<mode><vsi2qi>"
7334   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7335         (plus:VNx4SI_ONLY
7336           (unspec:VNx4SI_ONLY
7337             [(match_operand:<VSI2QI> 1 "register_operand")
7338              (match_operand:<VSI2QI> 2 "register_operand")]
7339             DOTPROD_US_ONLY)
7340           (match_operand:VNx4SI_ONLY 3 "register_operand")))]
7341   "TARGET_SVE_I8MM"
7342   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7343      [ w        , w , w , 0 ; *              ] <sur>dot\t%0.s, %1.b, %2.b
7344      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.b, %2.b
7345   }
7346 )
7347
7348 (define_insn "@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
7349   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7350         (plus:VNx4SI_ONLY
7351           (unspec:VNx4SI_ONLY
7352             [(match_operand:VNx16QI_ONLY 1 "register_operand")
7353              (unspec:VNx16QI_ONLY
7354                [(match_operand:VNx16QI_ONLY 2 "register_operand")
7355                 (match_operand:SI 3 "const_int_operand")]
7356                UNSPEC_SVE_LANE_SELECT)]
7357             DOTPROD_I8MM)
7358           (match_operand:VNx4SI_ONLY 4 "register_operand")))]
7359   "TARGET_SVE_I8MM"
7360   {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7361      [ w        , w , y , 0 ; *              ] <sur>dot\t%0.s, %1.b, %2.b[%3]
7362      [ ?&w      , w , y , w ; yes            ] movprfx\t%0, %4\;<sur>dot\t%0.s, %1.b, %2.b[%3]
7363   }
7364 )
7365
7366 ;; -------------------------------------------------------------------------
7367 ;; ---- [INT] Sum of absolute differences
7368 ;; -------------------------------------------------------------------------
7369 ;; The patterns in this section are synthetic.
7370 ;; -------------------------------------------------------------------------
7371
7372 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
7373 ;; operands 1 and 2.  The sequence also has to perform a widening reduction of
7374 ;; the difference into a vector and accumulate that into operand 3 before
7375 ;; copying that into the result operand 0.
7376 ;; Perform that with a sequence of:
7377 ;; MOV          ones.b, #1
7378 ;; [SU]ABD      diff.b, p0/m, op1.b, op2.b
7379 ;; MOVPRFX      op0, op3        // If necessary
7380 ;; UDOT         op0.s, diff.b, ones.b
7381 (define_expand "<su>sad<vsi2qi>"
7382   [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
7383    (USMAX:<VSI2QI> (match_operand:<VSI2QI> 1 "register_operand")
7384                    (match_operand:<VSI2QI> 2 "register_operand"))
7385    (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
7386   "TARGET_SVE"
7387   {
7388     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
7389     rtx diff = gen_reg_rtx (<VSI2QI>mode);
7390     emit_insn (gen_<su>abd<vsi2qi>3 (diff, operands[1], operands[2]));
7391     emit_insn (gen_udot_prod<mode><vsi2qi> (operands[0], diff, ones,
7392                                             operands[3]));
7393     DONE;
7394   }
7395 )
7396
7397 ;; -------------------------------------------------------------------------
7398 ;; ---- [INT] Matrix multiply-accumulate
7399 ;; -------------------------------------------------------------------------
7400 ;; Includes:
7401 ;; - SMMLA (I8MM)
7402 ;; - UMMLA (I8MM)
7403 ;; - USMMLA (I8MM)
7404 ;; -------------------------------------------------------------------------
7405
7406 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
7407   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7408         (plus:VNx4SI_ONLY
7409           (unspec:VNx4SI_ONLY
7410             [(match_operand:<VSI2QI> 2 "register_operand")
7411              (match_operand:<VSI2QI> 3 "register_operand")]
7412             MATMUL)
7413           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
7414   "TARGET_SVE_I8MM && TARGET_NON_STREAMING"
7415   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7416      [ w        , 0 , w , w ; *              ] <sur>mmla\t%0.s, %2.b, %3.b
7417      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sur>mmla\t%0.s, %2.b, %3.b
7418   }
7419 )
7420
7421 ;; -------------------------------------------------------------------------
7422 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
7423 ;; -------------------------------------------------------------------------
7424 ;; Includes merging patterns for:
7425 ;; - FMAD
7426 ;; - FMLA
7427 ;; - FMLS
7428 ;; - FMSB
7429 ;; - FNMAD
7430 ;; - FNMLA
7431 ;; - FNMLS
7432 ;; - FNMSB
7433 ;; -------------------------------------------------------------------------
7434
7435 ;; Unpredicated floating-point ternary operations.
7436 (define_expand "<optab><mode>4"
7437   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7438         (unspec:SVE_FULL_F_BF
7439           [(match_dup 4)
7440            (const_int SVE_RELAXED_GP)
7441            (match_operand:SVE_FULL_F_BF 1 "register_operand")
7442            (match_operand:SVE_FULL_F_BF 2 "register_operand")
7443            (match_operand:SVE_FULL_F_BF 3 "register_operand")]
7444           SVE_COND_FP_TERNARY))]
7445   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7446   {
7447     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7448   }
7449 )
7450
7451 ;; Predicated floating-point ternary operations.
7452 (define_insn "@aarch64_pred_<optab><mode>"
7453   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7454         (unspec:SVE_FULL_F_BF
7455           [(match_operand:<VPRED> 1 "register_operand")
7456            (match_operand:SI 5 "aarch64_sve_gp_strictness")
7457            (match_operand:SVE_FULL_F_BF 2 "register_operand")
7458            (match_operand:SVE_FULL_F_BF 3 "register_operand")
7459            (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7460           SVE_COND_FP_TERNARY))]
7461   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7462   {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx , is_rev ]
7463      [ w        , Upl , %w , w , 0 ; *   , *    ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7464      [ w        , Upl , 0  , w , w ; *   , true ] <b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7465      [ ?&w      , Upl , w  , w , w ; yes , *    ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7466   }
7467   [(set_attr "is_bf16" "<is_bf16>")
7468    (set_attr "supports_bf16_rev" "false")]
7469 )
7470
7471 ;; Predicated floating-point ternary operations with merging.
7472 (define_expand "@cond_<optab><mode>"
7473   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7474         (unspec:SVE_FULL_F_BF
7475           [(match_operand:<VPRED> 1 "register_operand")
7476            (unspec:SVE_FULL_F_BF
7477              [(match_dup 1)
7478               (const_int SVE_STRICT_GP)
7479               (match_operand:SVE_FULL_F_BF 2 "register_operand")
7480               (match_operand:SVE_FULL_F_BF 3 "register_operand")
7481               (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7482              SVE_COND_FP_TERNARY)
7483            (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
7484           UNSPEC_SEL))]
7485   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7486 {
7487   /* Swap the multiplication operands if the fallback value is the
7488      second of the two.  */
7489   if (rtx_equal_p (operands[3], operands[5]))
7490     std::swap (operands[2], operands[3]);
7491 })
7492
7493 ;; Predicated floating-point ternary operations, merging with the
7494 ;; first input.
7495 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
7496   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7497         (unspec:SVE_FULL_F
7498           [(match_operand:<VPRED> 1 "register_operand")
7499            (unspec:SVE_FULL_F
7500              [(match_operand 5)
7501               (const_int SVE_RELAXED_GP)
7502               (match_operand:SVE_FULL_F 2 "register_operand")
7503               (match_operand:SVE_FULL_F 3 "register_operand")
7504               (match_operand:SVE_FULL_F 4 "register_operand")]
7505              SVE_COND_FP_TERNARY)
7506            (match_dup 2)]
7507           UNSPEC_SEL))]
7508   "TARGET_SVE"
7509   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7510      [ w        , Upl , 0 , w , w ; *              ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7511      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7512   }
7513   "&& !rtx_equal_p (operands[1], operands[5])"
7514   {
7515     operands[5] = copy_rtx (operands[1]);
7516   }
7517 )
7518
7519 (define_insn "*cond_<optab><mode>_2_strict"
7520   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7521         (unspec:SVE_FULL_F
7522           [(match_operand:<VPRED> 1 "register_operand")
7523            (unspec:SVE_FULL_F
7524              [(match_dup 1)
7525               (const_int SVE_STRICT_GP)
7526               (match_operand:SVE_FULL_F 2 "register_operand")
7527               (match_operand:SVE_FULL_F 3 "register_operand")
7528               (match_operand:SVE_FULL_F 4 "register_operand")]
7529              SVE_COND_FP_TERNARY)
7530            (match_dup 2)]
7531           UNSPEC_SEL))]
7532   "TARGET_SVE"
7533   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7534      [ w        , Upl , 0 , w , w ; *              ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7535      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7536   }
7537 )
7538
7539 ;; Predicated floating-point ternary operations, merging with the
7540 ;; third input.
7541 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7542   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7543         (unspec:SVE_FULL_F_BF
7544           [(match_operand:<VPRED> 1 "register_operand")
7545            (unspec:SVE_FULL_F_BF
7546              [(match_operand 5)
7547               (const_int SVE_RELAXED_GP)
7548               (match_operand:SVE_FULL_F_BF 2 "register_operand")
7549               (match_operand:SVE_FULL_F_BF 3 "register_operand")
7550               (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7551              SVE_COND_FP_TERNARY)
7552            (match_dup 4)]
7553           UNSPEC_SEL))]
7554   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7555   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7556      [ w        , Upl , w , w , 0 ; *              ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7557      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7558   }
7559   "&& !rtx_equal_p (operands[1], operands[5])"
7560   {
7561     operands[5] = copy_rtx (operands[1]);
7562   }
7563 )
7564
7565 (define_insn "*cond_<optab><mode>_4_strict"
7566   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7567         (unspec:SVE_FULL_F_BF
7568           [(match_operand:<VPRED> 1 "register_operand")
7569            (unspec:SVE_FULL_F_BF
7570              [(match_dup 1)
7571               (const_int SVE_STRICT_GP)
7572               (match_operand:SVE_FULL_F_BF 2 "register_operand")
7573               (match_operand:SVE_FULL_F_BF 3 "register_operand")
7574               (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7575              SVE_COND_FP_TERNARY)
7576            (match_dup 4)]
7577           UNSPEC_SEL))]
7578   "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7579   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7580      [ w        , Upl , w , w , 0 ; *              ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7581      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7582   }
7583 )
7584
7585 ;; Predicated floating-point ternary operations, merging with an
7586 ;; independent value.
7587 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7588   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7589         (unspec:SVE_FULL_F_BF
7590           [(match_operand:<VPRED> 1 "register_operand")
7591            (unspec:SVE_FULL_F_BF
7592              [(match_operand 6)
7593               (const_int SVE_RELAXED_GP)
7594               (match_operand:SVE_FULL_F_BF 2 "register_operand")
7595               (match_operand:SVE_FULL_F_BF 3 "register_operand")
7596               (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7597              SVE_COND_FP_TERNARY)
7598            (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
7599           UNSPEC_SEL))]
7600   "TARGET_SVE
7601    && (<supports_bf16> || !<is_bf16>)
7602    && (<is_bf16> || !rtx_equal_p (operands[2], operands[5]))
7603    && (<is_bf16> || !rtx_equal_p (operands[3], operands[5]))
7604    && !rtx_equal_p (operands[4], operands[5])"
7605   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5  ; attrs: is_rev ]
7606      [ &w       , Upl , w , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7607      [ &w       , Upl , w , w , 0 , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7608      [ &w       , Upl , 0 , w , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7609      [ &w       , Upl , w , 0 , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7610      [ &w       , Upl , w , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7611      [ ?&w      , Upl , w , w , w , w  ; *    ] #
7612   }
7613   "&& 1"
7614   {
7615     if (reload_completed
7616         && register_operand (operands[5], <MODE>mode)
7617         && !rtx_equal_p (operands[0], operands[5]))
7618       {
7619         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7620                                                  operands[5], operands[1]));
7621         operands[5] = operands[4] = operands[0];
7622       }
7623     else if (!rtx_equal_p (operands[1], operands[6]))
7624       operands[6] = copy_rtx (operands[1]);
7625     else
7626       FAIL;
7627   }
7628   [(set_attr "movprfx" "yes")
7629    (set_attr "is_bf16" "<is_bf16>")
7630    (set_attr "supports_bf16_rev" "false")]
7631 )
7632
7633 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7634   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7635         (unspec:SVE_FULL_F_BF
7636           [(match_operand:<VPRED> 1 "register_operand")
7637            (unspec:SVE_FULL_F_BF
7638              [(match_dup 1)
7639               (const_int SVE_STRICT_GP)
7640               (match_operand:SVE_FULL_F_BF 2 "register_operand")
7641               (match_operand:SVE_FULL_F_BF 3 "register_operand")
7642               (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7643              SVE_COND_FP_TERNARY)
7644            (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
7645           UNSPEC_SEL))]
7646   "TARGET_SVE
7647    && (<supports_bf16> || !<is_bf16>)
7648    && (<is_bf16> || !rtx_equal_p (operands[2], operands[5]))
7649    && (<is_bf16> || !rtx_equal_p (operands[3], operands[5]))
7650    && !rtx_equal_p (operands[4], operands[5])"
7651   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5  ; attrs: is_rev ]
7652      [ &w       , Upl , w , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7653      [ &w       , Upl , w , w , 0 , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7654      [ &w       , Upl , 0 , w , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7655      [ &w       , Upl , w , 0 , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7656      [ &w       , Upl , w , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7657      [ ?&w      , Upl , w , w , w , w  ; *    ] #
7658   }
7659   "&& reload_completed
7660    && register_operand (operands[5], <MODE>mode)
7661    && !rtx_equal_p (operands[0], operands[5])"
7662   {
7663     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7664                                              operands[5], operands[1]));
7665     operands[5] = operands[4] = operands[0];
7666   }
7667   [(set_attr "movprfx" "yes")
7668    (set_attr "is_bf16" "<is_bf16>")
7669    (set_attr "supports_bf16_rev" "false")]
7670 )
7671
7672 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
7673 ;; (fma ...) since target-independent code won't understand the indexing.
7674 (define_insn "@aarch64_<optab>_lane_<mode>"
7675   [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7676         (unspec:SVE_FULL_F_BF
7677           [(match_operand:SVE_FULL_F_BF 1 "register_operand")
7678            (unspec:SVE_FULL_F_BF
7679              [(match_operand:SVE_FULL_F_BF 2 "register_operand")
7680               (match_operand:SI 3 "const_int_operand")]
7681              UNSPEC_SVE_LANE_SELECT)
7682            (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7683           SVE_FP_TERNARY_LANE))]
7684   "TARGET_SVE"
7685   {@ [ cons: =0 , 1 , 2              , 4 ; attrs: movprfx ]
7686      [ w        , w , <sve_lane_con> , 0 ; *              ] <b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7687      [ ?&w      , w , <sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7688   }
7689 )
7690
7691 ;; -------------------------------------------------------------------------
7692 ;; ---- [FP] Complex multiply-add
7693 ;; -------------------------------------------------------------------------
7694 ;; Includes merging patterns for:
7695 ;; - FCMLA
7696 ;; -------------------------------------------------------------------------
7697
7698 ;; Predicated FCMLA.
7699 (define_insn "@aarch64_pred_<optab><mode>"
7700   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7701         (unspec:SVE_FULL_F
7702           [(match_operand:<VPRED> 1 "register_operand")
7703            (match_operand:SI 5 "aarch64_sve_gp_strictness")
7704            (match_operand:SVE_FULL_F 2 "register_operand")
7705            (match_operand:SVE_FULL_F 3 "register_operand")
7706            (match_operand:SVE_FULL_F 4 "register_operand")]
7707           SVE_COND_FCMLA))]
7708   "TARGET_SVE"
7709   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7710      [ w        , Upl , w , w , 0 ; *              ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7711      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7712   }
7713 )
7714
7715 ;; unpredicated optab pattern for auto-vectorizer
7716 ;; The complex mla/mls operations always need to expand to two instructions.
7717 ;; The first operation does half the computation and the second does the
7718 ;; remainder.  Because of this, expand early.
7719 (define_expand "cml<fcmac1><conj_op><mode>4"
7720   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7721         (unspec:SVE_FULL_F
7722           [(match_dup 4)
7723            (match_dup 5)
7724            (match_operand:SVE_FULL_F 1 "register_operand")
7725            (match_operand:SVE_FULL_F 2 "register_operand")
7726            (match_operand:SVE_FULL_F 3 "register_operand")]
7727           FCMLA_OP))]
7728   "TARGET_SVE"
7729 {
7730   operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7731   operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
7732   rtx tmp = gen_reg_rtx (<MODE>mode);
7733   emit_insn
7734     (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
7735                                              operands[2], operands[1],
7736                                              operands[3], operands[5]));
7737   emit_insn
7738     (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
7739                                              operands[2], operands[1],
7740                                              tmp, operands[5]));
7741   DONE;
7742 })
7743
7744 ;; unpredicated optab pattern for auto-vectorizer
7745 ;; The complex mul operations always need to expand to two instructions.
7746 ;; The first operation does half the computation and the second does the
7747 ;; remainder.  Because of this, expand early.
7748 (define_expand "cmul<conj_op><mode>3"
7749   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7750         (unspec:SVE_FULL_F
7751            [(match_operand:SVE_FULL_F 1 "register_operand")
7752             (match_operand:SVE_FULL_F 2 "register_operand")]
7753           FCMUL_OP))]
7754   "TARGET_SVE"
7755 {
7756   rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
7757   rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
7758   rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
7759   rtx tmp = gen_reg_rtx (<MODE>mode);
7760   emit_insn
7761     (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
7762                                              operands[2], operands[1],
7763                                              accum, gp_mode));
7764   emit_insn
7765     (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
7766                                              operands[2], operands[1],
7767                                              tmp, gp_mode));
7768   DONE;
7769 })
7770
7771 ;; Predicated FCMLA with merging.
7772 (define_expand "@cond_<optab><mode>"
7773   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7774         (unspec:SVE_FULL_F
7775           [(match_operand:<VPRED> 1 "register_operand")
7776            (unspec:SVE_FULL_F
7777              [(match_dup 1)
7778               (const_int SVE_STRICT_GP)
7779               (match_operand:SVE_FULL_F 2 "register_operand")
7780               (match_operand:SVE_FULL_F 3 "register_operand")
7781               (match_operand:SVE_FULL_F 4 "register_operand")]
7782              SVE_COND_FCMLA)
7783            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7784           UNSPEC_SEL))]
7785   "TARGET_SVE"
7786 )
7787
7788 ;; Predicated FCMLA, merging with the third input.
7789 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7790   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7791         (unspec:SVE_FULL_F
7792           [(match_operand:<VPRED> 1 "register_operand")
7793            (unspec:SVE_FULL_F
7794              [(match_operand 5)
7795               (const_int SVE_RELAXED_GP)
7796               (match_operand:SVE_FULL_F 2 "register_operand")
7797               (match_operand:SVE_FULL_F 3 "register_operand")
7798               (match_operand:SVE_FULL_F 4 "register_operand")]
7799              SVE_COND_FCMLA)
7800            (match_dup 4)]
7801           UNSPEC_SEL))]
7802   "TARGET_SVE"
7803   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7804      [ w        , Upl , w , w , 0 ; *              ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7805      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7806   }
7807   "&& !rtx_equal_p (operands[1], operands[5])"
7808   {
7809     operands[5] = copy_rtx (operands[1]);
7810   }
7811 )
7812
7813 (define_insn "*cond_<optab><mode>_4_strict"
7814   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7815         (unspec:SVE_FULL_F
7816           [(match_operand:<VPRED> 1 "register_operand")
7817            (unspec:SVE_FULL_F
7818              [(match_dup 1)
7819               (const_int SVE_STRICT_GP)
7820               (match_operand:SVE_FULL_F 2 "register_operand")
7821               (match_operand:SVE_FULL_F 3 "register_operand")
7822               (match_operand:SVE_FULL_F 4 "register_operand")]
7823              SVE_COND_FCMLA)
7824            (match_dup 4)]
7825           UNSPEC_SEL))]
7826   "TARGET_SVE"
7827   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
7828      [ w        , Upl , w , w , 0 ; *              ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7829      [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7830   }
7831 )
7832
7833 ;; Predicated FCMLA, merging with an independent value.
7834 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7835   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7836         (unspec:SVE_FULL_F
7837           [(match_operand:<VPRED> 1 "register_operand")
7838            (unspec:SVE_FULL_F
7839              [(match_operand 6)
7840               (const_int SVE_RELAXED_GP)
7841               (match_operand:SVE_FULL_F 2 "register_operand")
7842               (match_operand:SVE_FULL_F 3 "register_operand")
7843               (match_operand:SVE_FULL_F 4 "register_operand")]
7844              SVE_COND_FCMLA)
7845            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7846           UNSPEC_SEL))]
7847   "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7848   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7849      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7850      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7851      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7852      [ ?&w      , Upl , w , w , w , w   ] #
7853   }
7854   "&& 1"
7855   {
7856     if (reload_completed
7857         && register_operand (operands[5], <MODE>mode)
7858         && !rtx_equal_p (operands[0], operands[5]))
7859       {
7860         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7861                                                  operands[5], operands[1]));
7862         operands[5] = operands[4] = operands[0];
7863       }
7864     else if (!rtx_equal_p (operands[1], operands[6]))
7865       operands[6] = copy_rtx (operands[1]);
7866     else
7867       FAIL;
7868   }
7869   [(set_attr "movprfx" "yes")]
7870 )
7871
7872 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7873   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7874         (unspec:SVE_FULL_F
7875           [(match_operand:<VPRED> 1 "register_operand")
7876            (unspec:SVE_FULL_F
7877              [(match_dup 1)
7878               (const_int SVE_STRICT_GP)
7879               (match_operand:SVE_FULL_F 2 "register_operand")
7880               (match_operand:SVE_FULL_F 3 "register_operand")
7881               (match_operand:SVE_FULL_F 4 "register_operand")]
7882              SVE_COND_FCMLA)
7883            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7884           UNSPEC_SEL))]
7885   "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7886   {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
7887      [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7888      [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7889      [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7890      [ ?&w      , Upl , w , w , w , w   ] #
7891   }
7892   "&& reload_completed
7893    && register_operand (operands[5], <MODE>mode)
7894    && !rtx_equal_p (operands[0], operands[5])"
7895   {
7896     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7897                                              operands[5], operands[1]));
7898     operands[5] = operands[4] = operands[0];
7899   }
7900   [(set_attr "movprfx" "yes")]
7901 )
7902
7903 ;; Unpredicated FCMLA with indexing.
7904 (define_insn "@aarch64_<optab>_lane_<mode>"
7905   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
7906         (unspec:SVE_FULL_HSF
7907           [(match_operand:SVE_FULL_HSF 1 "register_operand")
7908            (unspec:SVE_FULL_HSF
7909              [(match_operand:SVE_FULL_HSF 2 "register_operand")
7910               (match_operand:SI 3 "const_int_operand")]
7911              UNSPEC_SVE_LANE_SELECT)
7912            (match_operand:SVE_FULL_HSF 4 "register_operand")]
7913           FCMLA))]
7914   "TARGET_SVE"
7915   {@ [ cons: =0 , 1 , 2                   , 4 ; attrs: movprfx ]
7916      [ w        , w , <sve_lane_pair_con> , 0 ; *              ] fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7917      [ ?&w      , w , <sve_lane_pair_con> , w ; yes            ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7918   }
7919 )
7920
7921 ;; -------------------------------------------------------------------------
7922 ;; ---- [FP] Trigonometric multiply-add
7923 ;; -------------------------------------------------------------------------
7924 ;; Includes:
7925 ;; - FTMAD
7926 ;; -------------------------------------------------------------------------
7927
7928 (define_insn "@aarch64_sve_tmad<mode>"
7929   [(set (match_operand:SVE_FULL_F 0 "register_operand")
7930         (unspec:SVE_FULL_F
7931           [(match_operand:SVE_FULL_F 1 "register_operand")
7932            (match_operand:SVE_FULL_F 2 "register_operand")
7933            (match_operand:DI 3 "const_int_operand")]
7934           UNSPEC_FTMAD))]
7935   "TARGET_SVE"
7936   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
7937      [ w        , 0 , w ; *              ] ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7938      [ ?&w      , w , w ; yes            ] movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7939   }
7940 )
7941
7942 ;; -------------------------------------------------------------------------
7943 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7944 ;; -------------------------------------------------------------------------
7945 ;; Includes:
7946 ;; - BFDOT (BF16)
7947 ;; - BFMLALB (BF16)
7948 ;; - BFMLALT (BF16)
7949 ;; - BFMLSLB (SVE2p1, SME2)
7950 ;; - BFMLSLT (SVE2p1, SME2)
7951 ;; - BFMMLA (BF16)
7952 ;; -------------------------------------------------------------------------
7953
7954 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7955   [(set (match_operand:VNx4SF 0 "register_operand")
7956         (unspec:VNx4SF
7957           [(match_operand:VNx4SF 1 "register_operand")
7958            (match_operand:VNx8BF 2 "register_operand")
7959            (match_operand:VNx8BF 3 "register_operand")]
7960           SVE_BFLOAT_TERNARY_LONG))]
7961   ""
7962   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7963      [ w        , 0 , w , w ; *              ] <sve_fp_op>\t%0.s, %2.h, %3.h
7964      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h
7965   }
7966 )
7967
7968 ;; The immediate range is enforced before generating the instruction.
7969 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7970   [(set (match_operand:VNx4SF 0 "register_operand")
7971         (unspec:VNx4SF
7972           [(match_operand:VNx4SF 1 "register_operand")
7973            (match_operand:VNx8BF 2 "register_operand")
7974            (match_operand:VNx8BF 3 "register_operand")
7975            (match_operand:SI 4 "const_int_operand")]
7976           SVE_BFLOAT_TERNARY_LONG_LANE))]
7977   ""
7978   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7979      [ w        , 0 , w , y ; *              ] <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7980      [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7981   }
7982 )
7983
7984 ;; -------------------------------------------------------------------------
7985 ;; ---- [FP] Matrix multiply-accumulate
7986 ;; -------------------------------------------------------------------------
7987 ;; Includes:
7988 ;; - FMMLA (F32MM,F64MM)
7989 ;; -------------------------------------------------------------------------
7990
7991 ;; The mode iterator enforces the target requirements.
7992 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
7993   [(set (match_operand:SVE_MATMULF 0 "register_operand")
7994         (unspec:SVE_MATMULF
7995           [(match_operand:SVE_MATMULF 2 "register_operand")
7996            (match_operand:SVE_MATMULF 3 "register_operand")
7997            (match_operand:SVE_MATMULF 1 "register_operand")]
7998           FMMLA))]
7999   "TARGET_SVE && TARGET_NON_STREAMING"
8000   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
8001      [ w        , 0 , w , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
8002      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
8003   }
8004 )
8005
8006 ;; =========================================================================
8007 ;; == Comparisons and selects
8008 ;; =========================================================================
8009
8010 ;; -------------------------------------------------------------------------
8011 ;; ---- [INT,FP] Select based on predicates
8012 ;; -------------------------------------------------------------------------
8013 ;; Includes merging patterns for:
8014 ;; - FMOV
8015 ;; - MOV
8016 ;; - SEL
8017 ;; -------------------------------------------------------------------------
8018
8019 ;; vcond_mask operand order: true, false, mask
8020 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
8021 ;; SEL operand order:        mask, true, false
8022 (define_expand "@vcond_mask_<mode><vpred>"
8023   [(set (match_operand:SVE_ALL 0 "register_operand")
8024         (unspec:SVE_ALL
8025           [(match_operand:<VPRED> 3 "register_operand")
8026            (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
8027            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
8028           UNSPEC_SEL))]
8029   "TARGET_SVE"
8030   {
8031     if (register_operand (operands[1], <MODE>mode))
8032       operands[2] = force_reg (<MODE>mode, operands[2]);
8033   }
8034 )
8035
8036 ;; Selects between:
8037 ;; - two registers
8038 ;; - a duplicated immediate and a register
8039 ;; - a duplicated immediate and zero
8040 ;;
8041 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
8042 ;; the container size or the element size.  If SEL used the container size,
8043 ;; it would ignore undefined bits of the predicate but would copy the
8044 ;; upper (undefined) bits of each container along with the defined bits.
8045 ;; If SEL used the element size, it would use undefined bits of the predicate
8046 ;; to select between undefined elements in each input vector.  Thus the only
8047 ;; difference is whether the undefined bits in a container always come from
8048 ;; the same input as the defined bits, or whether the choice can vary
8049 ;; independently of the defined bits.
8050 ;;
8051 ;; For the other instructions, using the element size is more natural,
8052 ;; so we do that for SEL as well.
8053 (define_insn "*vcond_mask_<mode><vpred>"
8054   [(set (match_operand:SVE_ALL 0 "register_operand")
8055         (unspec:SVE_ALL
8056           [(match_operand:<VPRED> 3 "register_operand")
8057            (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
8058            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
8059           UNSPEC_SEL))]
8060   "TARGET_SVE
8061    && (!register_operand (operands[1], <MODE>mode)
8062        || register_operand (operands[2], <MODE>mode))"
8063   {@ [ cons: =0 , 1   , 2  , 3   ; attrs: movprfx ]
8064      [ w        , w   , w  , Upa ; *              ] sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
8065      [ w        , vss , 0  , Upa ; *              ] mov\t%0.<Vetype>, %3/m, #%I1
8066      [ w        , vss , Dz , Upa ; *              ] mov\t%0.<Vetype>, %3/z, #%I1
8067      [ w        , Ufc , 0  , Upa ; *              ] fmov\t%0.<Vetype>, %3/m, #%1
8068      [ ?w       , Ufc , Dz , Upl ; yes            ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
8069      [ ?&w      , vss , w  , Upa ; yes            ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
8070      [ ?&w      , Ufc , w  , Upa ; yes            ] movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1
8071   }
8072 )
8073
8074 ;; Optimize selects between a duplicated scalar variable and another vector,
8075 ;; the latter of which can be a zero constant or a variable.  Treat duplicates
8076 ;; of GPRs as being more expensive than duplicates of FPRs, since they
8077 ;; involve a cross-file move.
8078 (define_insn "@aarch64_sel_dup<mode>"
8079   [(set (match_operand:SVE_ALL 0 "register_operand")
8080         (unspec:SVE_ALL
8081           [(match_operand:<VPRED> 3 "register_operand")
8082            (vec_duplicate:SVE_ALL
8083              (match_operand:<VEL> 1 "register_operand"))
8084            (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
8085           UNSPEC_SEL))]
8086   "TARGET_SVE"
8087   {@ [ cons: =0 , 1 , 2  , 3   ; attrs: movprfx ]
8088      [ ?w       , r , 0  , Upl ; *              ] mov\t%0.<Vetype>, %3/m, %<vwcore>1
8089      [ w        , w , 0  , Upl ; *              ] mov\t%0.<Vetype>, %3/m, %<Vetype>1
8090      [ ??w      , r , Dz , Upl ; yes            ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
8091      [ ?&w      , w , Dz , Upl ; yes            ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
8092      [ ??&w     , r , w  , Upl ; yes            ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
8093      [ ?&w      , w , w  , Upl ; yes            ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
8094   }
8095 )
8096
8097 ;; -------------------------------------------------------------------------
8098 ;; ---- [INT] Comparisons
8099 ;; -------------------------------------------------------------------------
8100 ;; Includes:
8101 ;; - CMPEQ
8102 ;; - CMPGE
8103 ;; - CMPGT
8104 ;; - CMPHI
8105 ;; - CMPHS
8106 ;; - CMPLE
8107 ;; - CMPLO
8108 ;; - CMPLS
8109 ;; - CMPLT
8110 ;; - CMPNE
8111 ;; -------------------------------------------------------------------------
8112
8113 ;; Signed integer comparisons.  Don't enforce an immediate range here, since
8114 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8115 ;; instead.
8116 (define_expand "vec_cmp<mode><vpred>"
8117   [(parallel
8118     [(set (match_operand:<VPRED> 0 "register_operand")
8119           (match_operator:<VPRED> 1 "comparison_operator"
8120             [(match_operand:SVE_I 2 "register_operand")
8121              (match_operand:SVE_I 3 "nonmemory_operand")]))
8122      (clobber (reg:CC_NZC CC_REGNUM))])]
8123   "TARGET_SVE"
8124   {
8125     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8126                                     operands[2], operands[3]);
8127     DONE;
8128   }
8129 )
8130
8131 ;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
8132 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8133 ;; instead.
8134 (define_expand "vec_cmpu<mode><vpred>"
8135   [(parallel
8136     [(set (match_operand:<VPRED> 0 "register_operand")
8137           (match_operator:<VPRED> 1 "comparison_operator"
8138             [(match_operand:SVE_I 2 "register_operand")
8139              (match_operand:SVE_I 3 "nonmemory_operand")]))
8140      (clobber (reg:CC_NZC CC_REGNUM))])]
8141   "TARGET_SVE"
8142   {
8143     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8144                                     operands[2], operands[3]);
8145     DONE;
8146   }
8147 )
8148
8149 ;; Predicated integer comparisons.
8150 ;;
8151 ;; For unpacked vectors, only the lowpart element in each input container
8152 ;; has a defined value, and only the predicate bits associated with
8153 ;; those elements are defined.  For example, when comparing two VNx2SIs:
8154 ;;
8155 ;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
8156 ;;   DI container store an SI element.  The upper bits of each DI container
8157 ;;   are undefined.
8158 ;;
8159 ;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
8160 ;;   even elements are defined and the odd elements are undefined.
8161 ;;
8162 ;; - The associated predicate mode is VNx2BI.  This means that only the
8163 ;;   low bit in each predicate byte is defined (on input and on output).
8164 ;;
8165 ;; - We use a .s comparison to compare VNx2SIs, under the control of a
8166 ;;   VNx2BI governing predicate, to produce a VNx2BI result.  If we view
8167 ;;   the .s operation as operating on VNx4SIs then for odd lanes:
8168 ;;
8169 ;;   - the input governing predicate bit is undefined
8170 ;;   - the SI elements being compared are undefined
8171 ;;   - the predicate result bit is therefore undefined, but
8172 ;;   - the predicate result bit is in the undefined part of a VNx2BI,
8173 ;;     so its value doesn't matter anyway.
8174 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
8175   [(set (match_operand:<VPRED> 0 "register_operand")
8176         (unspec:<VPRED>
8177           [(match_operand:<VPRED> 1 "register_operand")
8178            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8179            (SVE_INT_CMP:<VPRED>
8180              (match_operand:SVE_I 3 "register_operand")
8181              (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8182           UNSPEC_PRED_Z))
8183    (clobber (reg:CC_NZC CC_REGNUM))]
8184   "TARGET_SVE"
8185   {@ [ cons: =0 , 1  , 3 , 4            ; attrs: pred_clobber ]
8186      [ &Upa     , Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
8187      [ ?Upl     , 0  , w , <sve_imm_con>; yes                 ] ^
8188      [ Upa      , Upl, w , <sve_imm_con>; no                  ] ^
8189      [ &Upa     , Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8190      [ ?Upl     , 0  , w , w            ; yes                 ] ^
8191      [ Upa      , Upl, w , w            ; no                  ] ^
8192   }
8193 )
8194
8195 ;; Predicated integer comparisons in which both the flag and predicate
8196 ;; results are interesting.
8197 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
8198   [(set (reg:CC_NZC CC_REGNUM)
8199         (unspec:CC_NZC
8200           [(match_operand:VNx16BI 1 "register_operand")
8201            (match_operand 4)
8202            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8203            (unspec:<VPRED>
8204              [(match_operand 6)
8205               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8206               (SVE_INT_CMP:<VPRED>
8207                 (match_operand:SVE_I 2 "register_operand")
8208                 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8209              UNSPEC_PRED_Z)]
8210           UNSPEC_PTEST))
8211    (set (match_operand:<VPRED> 0 "register_operand")
8212         (unspec:<VPRED>
8213           [(match_dup 6)
8214            (match_dup 7)
8215            (SVE_INT_CMP:<VPRED>
8216              (match_dup 2)
8217              (match_dup 3))]
8218           UNSPEC_PRED_Z))]
8219   "TARGET_SVE
8220    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8221   {@ [ cons: =0 , 1   , 2 , 3            ; attrs: pred_clobber ]
8222      [ &Upa     ,  Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8223      [ ?Upl     ,  0  , w , <sve_imm_con>; yes                 ] ^
8224      [ Upa      ,  Upl, w , <sve_imm_con>; no                  ] ^
8225      [ &Upa     ,  Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8226      [ ?Upl     ,  0  , w , w            ; yes                 ] ^
8227      [ Upa      ,  Upl, w , w            ; no                  ] ^
8228   }
8229   "&& !rtx_equal_p (operands[4], operands[6])"
8230   {
8231     operands[6] = copy_rtx (operands[4]);
8232     operands[7] = operands[5];
8233   }
8234 )
8235
8236 ;; Predicated integer comparisons in which only the flags result is
8237 ;; interesting.
8238 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
8239   [(set (reg:CC_NZC CC_REGNUM)
8240         (unspec:CC_NZC
8241           [(match_operand:VNx16BI 1 "register_operand")
8242            (match_operand 4)
8243            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8244            (unspec:<VPRED>
8245              [(match_operand 6)
8246               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8247               (SVE_INT_CMP:<VPRED>
8248                 (match_operand:SVE_I 2 "register_operand")
8249                 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8250              UNSPEC_PRED_Z)]
8251           UNSPEC_PTEST))
8252    (clobber (match_scratch:<VPRED> 0))]
8253   "TARGET_SVE
8254    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8255   {@ [ cons: =0, 1    , 2 , 3            ; attrs: pred_clobber ]
8256      [ &Upa    ,  Upl, w , <sve_imm_con>; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8257      [ ?Upl    ,  0  , w , <sve_imm_con>; yes                 ] ^
8258      [ Upa     ,  Upl, w , <sve_imm_con>; no                  ] ^
8259      [ &Upa    ,  Upl, w , w            ; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8260      [ ?Upl    ,  0  , w , w            ; yes                 ] ^
8261      [ Upa     ,  Upl, w , w            ; no                  ] ^
8262   }
8263   "&& !rtx_equal_p (operands[4], operands[6])"
8264   {
8265     operands[6] = copy_rtx (operands[4]);
8266     operands[7] = operands[5];
8267   }
8268 )
8269
8270 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
8271 ;; comparison with an AND.  Split the instruction into its preferred form
8272 ;; at the earliest opportunity, in order to get rid of the redundant
8273 ;; operand 4.
8274 (define_insn_and_split "*cmp<cmp_op><mode>_and"
8275   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8276         (and:<VPRED>
8277           (unspec:<VPRED>
8278             [(match_operand 4)
8279              (const_int SVE_KNOWN_PTRUE)
8280              (SVE_INT_CMP:<VPRED>
8281                (match_operand:SVE_I 2 "register_operand" "w, w")
8282                (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
8283             UNSPEC_PRED_Z)
8284           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
8285    (clobber (reg:CC_NZC CC_REGNUM))]
8286   "TARGET_SVE"
8287   "#"
8288   "&& 1"
8289   [(parallel
8290      [(set (match_dup 0)
8291            (unspec:<VPRED>
8292              [(match_dup 1)
8293               (const_int SVE_MAYBE_NOT_PTRUE)
8294               (SVE_INT_CMP:<VPRED>
8295                 (match_dup 2)
8296                 (match_dup 3))]
8297              UNSPEC_PRED_Z))
8298       (clobber (reg:CC_NZC CC_REGNUM))])]
8299 )
8300
8301 ;; Predicated integer wide comparisons.
8302 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
8303   [(set (match_operand:<VPRED> 0 "register_operand")
8304         (unspec:<VPRED>
8305           [(match_operand:VNx16BI 1 "register_operand")
8306            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8307            (unspec:<VPRED>
8308              [(match_operand:SVE_FULL_BHSI 3 "register_operand")
8309               (match_operand:VNx2DI 4 "register_operand")]
8310              SVE_COND_INT_CMP_WIDE)]
8311           UNSPEC_PRED_Z))
8312    (clobber (reg:CC_NZC CC_REGNUM))]
8313   "TARGET_SVE"
8314   {@ [ cons: =0, 1   , 2, 3, 4; attrs: pred_clobber ]
8315      [ &Upa    ,  Upl,  , w, w; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d
8316      [ ?Upl    ,  0  ,  , w, w; yes                 ] ^
8317      [ Upa     ,  Upl,  , w, w; no                  ] ^
8318   }
8319 )
8320
8321 ;; Predicated integer wide comparisons in which both the flag and
8322 ;; predicate results are interesting.
8323 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
8324   [(set (reg:CC_NZC CC_REGNUM)
8325         (unspec:CC_NZC
8326           [(match_operand:VNx16BI 1 "register_operand")
8327            (match_operand 4)
8328            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8329            (unspec:<VPRED>
8330              [(match_operand:VNx16BI 6 "register_operand")
8331               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8332               (unspec:<VPRED>
8333                 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8334                  (match_operand:VNx2DI 3 "register_operand")]
8335                 SVE_COND_INT_CMP_WIDE)]
8336              UNSPEC_PRED_Z)]
8337           UNSPEC_PTEST))
8338    (set (match_operand:<VPRED> 0 "register_operand")
8339         (unspec:<VPRED>
8340           [(match_dup 6)
8341            (match_dup 7)
8342            (unspec:<VPRED>
8343              [(match_dup 2)
8344               (match_dup 3)]
8345              SVE_COND_INT_CMP_WIDE)]
8346           UNSPEC_PRED_Z))]
8347   "TARGET_SVE
8348    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8349   {@ [ cons: =0, 1   , 2, 3, 6  ; attrs: pred_clobber ]
8350      [ &Upa    ,  Upl, w, w, Upl; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8351      [ ?Upl    ,  0  , w, w, Upl; yes                 ] ^
8352      [ Upa     ,  Upl, w, w, Upl; no                  ] ^
8353   }
8354 )
8355
8356 ;; Predicated integer wide comparisons in which only the flags result
8357 ;; is interesting.
8358 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
8359   [(set (reg:CC_NZC CC_REGNUM)
8360         (unspec:CC_NZC
8361           [(match_operand:VNx16BI 1 "register_operand")
8362            (match_operand 4)
8363            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8364            (unspec:<VPRED>
8365              [(match_operand:VNx16BI 6 "register_operand")
8366               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8367               (unspec:<VPRED>
8368                 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8369                  (match_operand:VNx2DI 3 "register_operand")]
8370                 SVE_COND_INT_CMP_WIDE)]
8371              UNSPEC_PRED_Z)]
8372           UNSPEC_PTEST))
8373    (clobber (match_scratch:<VPRED> 0))]
8374   "TARGET_SVE
8375    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8376   {@ [ cons:  =0, 1   , 2, 3, 6  ; attrs: pred_clobber ]
8377      [ &Upa     ,  Upl, w, w, Upl; yes                 ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8378      [ ?Upl     ,  0  , w, w, Upl; yes                 ] ^
8379      [ Upa      ,  Upl, w, w, Upl; no                  ] ^
8380   }
8381 )
8382
8383 ;; -------------------------------------------------------------------------
8384 ;; ---- [INT] While tests
8385 ;; -------------------------------------------------------------------------
8386 ;; Includes:
8387 ;; - WHILEGE (SVE2)
8388 ;; - WHILEGT (SVE2)
8389 ;; - WHILEHI (SVE2)
8390 ;; - WHILEHS (SVE2)
8391 ;; - WHILELE
8392 ;; - WHILELO
8393 ;; - WHILELS
8394 ;; - WHILELT
8395 ;; - WHILERW (SVE2)
8396 ;; - WHILEWR (SVE2)
8397 ;; -------------------------------------------------------------------------
8398
8399 (define_constants [
8400   (SVE_WHILE_B 0)
8401   (SVE_WHILE_B_X2 1)
8402   (SVE_WHILE_C 2)
8403 ])
8404
8405 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
8406 ;; true for all J in [0, I].
8407 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
8408   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8409         (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8410                           (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8411                           (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8412                          SVE_WHILE))
8413    (clobber (reg:CC_NZC CC_REGNUM))]
8414   "TARGET_SVE"
8415   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8416 )
8417
8418 ;; The WHILE instructions set the flags in the same way as a PTEST with
8419 ;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
8420 ;; operands to the PTEST aren't needed, so we allow them to be anything.
8421 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
8422   [(set (reg:CC_NZC CC_REGNUM)
8423         (unspec:CC_NZC
8424           [(match_operand 3)
8425            (match_operand 4)
8426            (const_int SVE_KNOWN_PTRUE)
8427            (unspec:PRED_ALL
8428              [(const_int SVE_WHILE_B)
8429               (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8430               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8431              SVE_WHILE)]
8432           UNSPEC_PTEST))
8433    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8434         (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8435                           (match_dup 1)
8436                           (match_dup 2)]
8437                          SVE_WHILE))]
8438   "TARGET_SVE"
8439   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8440   ;; Force the compiler to drop the unused predicate operand, so that we
8441   ;; don't have an unnecessary PTRUE.
8442   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8443   {
8444     operands[3] = CONSTM1_RTX (VNx16BImode);
8445     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8446   }
8447 )
8448
8449 ;; Same, but handle the case in which only the flags result is useful.
8450 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
8451   [(set (reg:CC_NZC CC_REGNUM)
8452         (unspec:CC_NZC
8453           [(match_operand 3)
8454            (match_operand 4)
8455            (const_int SVE_KNOWN_PTRUE)
8456            (unspec:PRED_ALL
8457              [(const_int SVE_WHILE_B)
8458               (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8459               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8460              SVE_WHILE)]
8461           UNSPEC_PTEST))
8462    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8463   "TARGET_SVE"
8464   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8465   ;; Force the compiler to drop the unused predicate operand, so that we
8466   ;; don't have an unnecessary PTRUE.
8467   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8468   {
8469     operands[3] = CONSTM1_RTX (VNx16BImode);
8470     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8471   }
8472 )
8473
8474 ;; -------------------------------------------------------------------------
8475 ;; ---- [FP] Direct comparisons
8476 ;; -------------------------------------------------------------------------
8477 ;; Includes:
8478 ;; - FCMEQ
8479 ;; - FCMGE
8480 ;; - FCMGT
8481 ;; - FCMLE
8482 ;; - FCMLT
8483 ;; - FCMNE
8484 ;; - FCMUO
8485 ;; -------------------------------------------------------------------------
8486
8487 ;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
8488 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
8489 ;; with zero.
8490 (define_expand "vec_cmp<mode><vpred>"
8491   [(set (match_operand:<VPRED> 0 "register_operand")
8492         (match_operator:<VPRED> 1 "comparison_operator"
8493           [(match_operand:SVE_FULL_F 2 "register_operand")
8494            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
8495   "TARGET_SVE"
8496   {
8497     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
8498                                       operands[2], operands[3], false);
8499     DONE;
8500   }
8501 )
8502
8503 ;; Predicated floating-point comparisons.
8504 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
8505   [(set (match_operand:<VPRED> 0 "register_operand")
8506         (unspec:<VPRED>
8507           [(match_operand:<VPRED> 1 "register_operand")
8508            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8509            (match_operand:SVE_FULL_F 3 "register_operand")
8510            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
8511           SVE_COND_FP_CMP_I0))]
8512   "TARGET_SVE"
8513   {@ [ cons: =0 , 1   , 3 , 4   ]
8514      [ Upa      , Upl , w , Dz  ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
8515      [ Upa      , Upl , w , w   ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8516   }
8517 )
8518
8519 ;; Same for unordered comparisons.
8520 (define_insn "@aarch64_pred_fcmuo<mode>"
8521   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8522         (unspec:<VPRED>
8523           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8524            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8525            (match_operand:SVE_FULL_F 3 "register_operand" "w")
8526            (match_operand:SVE_FULL_F 4 "register_operand" "w")]
8527           UNSPEC_COND_FCMUO))]
8528   "TARGET_SVE"
8529   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8530 )
8531
8532 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
8533 ;; with another predicate P.  This does not have the same trapping behavior
8534 ;; as predicating the comparison itself on P, but it's a legitimate fold,
8535 ;; since we can drop any potentially-trapping operations whose results
8536 ;; are not needed.
8537 ;;
8538 ;; Split the instruction into its preferred form (below) at the earliest
8539 ;; opportunity, in order to get rid of the redundant operand 1.
8540 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
8541   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8542         (and:<VPRED>
8543           (unspec:<VPRED>
8544             [(match_operand:<VPRED> 1)
8545              (const_int SVE_KNOWN_PTRUE)
8546              (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
8547              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
8548             SVE_COND_FP_CMP_I0)
8549           (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
8550   "TARGET_SVE"
8551   "#"
8552   "&& 1"
8553   [(set (match_dup 0)
8554         (unspec:<VPRED>
8555           [(match_dup 4)
8556            (const_int SVE_MAYBE_NOT_PTRUE)
8557            (match_dup 2)
8558            (match_dup 3)]
8559           SVE_COND_FP_CMP_I0))]
8560 )
8561
8562 ;; Same for unordered comparisons.
8563 (define_insn_and_split "*fcmuo<mode>_and_combine"
8564   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8565         (and:<VPRED>
8566           (unspec:<VPRED>
8567             [(match_operand:<VPRED> 1)
8568              (const_int SVE_KNOWN_PTRUE)
8569              (match_operand:SVE_FULL_F 2 "register_operand" "w")
8570              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8571             UNSPEC_COND_FCMUO)
8572           (match_operand:<VPRED> 4 "register_operand" "Upl")))]
8573   "TARGET_SVE"
8574   "#"
8575   "&& 1"
8576   [(set (match_dup 0)
8577         (unspec:<VPRED>
8578           [(match_dup 4)
8579            (const_int SVE_MAYBE_NOT_PTRUE)
8580            (match_dup 2)
8581            (match_dup 3)]
8582           UNSPEC_COND_FCMUO))]
8583 )
8584
8585 ;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
8586 ;; In this case, we still need a separate NOT/BIC operation, but predicating
8587 ;; the comparison on the BIC operand removes the need for a PTRUE.
8588 (define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
8589   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8590         (and:<VPRED>
8591           (and:<VPRED>
8592             (not:<VPRED>
8593               (unspec:<VPRED>
8594                 [(match_operand:<VPRED> 1)
8595                  (const_int SVE_KNOWN_PTRUE)
8596                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8597                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8598                 SVE_COND_FP_CMP_I0))
8599             (match_operand:<VPRED> 4 "register_operand" "Upa"))
8600           (match_dup:<VPRED> 1)))
8601    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8602   "TARGET_SVE"
8603   "#"
8604   "&& 1"
8605   [(set (match_dup 5)
8606         (unspec:<VPRED>
8607           [(match_dup 4)
8608            (const_int SVE_MAYBE_NOT_PTRUE)
8609            (match_dup 2)
8610            (match_dup 3)]
8611           SVE_COND_FP_CMP_I0))
8612    (set (match_dup 0)
8613         (and:<VPRED>
8614           (not:<VPRED>
8615             (match_dup 5))
8616           (match_dup 4)))]
8617 {
8618   if (can_create_pseudo_p ())
8619     operands[5] = gen_reg_rtx (<VPRED>mode);
8620 }
8621 )
8622
8623 ;; Make sure that we expand to a nor when the operand 4 of
8624 ;; *fcm<cmp_op><mode>_bic_combine is a not.
8625 (define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
8626   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8627         (and:<VPRED>
8628           (and:<VPRED>
8629             (not:<VPRED>
8630               (unspec:<VPRED>
8631                 [(match_operand:<VPRED> 1)
8632                  (const_int SVE_KNOWN_PTRUE)
8633                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8634                  (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8635                 SVE_COND_FP_CMP_I0))
8636             (not:<VPRED>
8637               (match_operand:<VPRED> 4 "register_operand" "Upa")))
8638           (match_dup:<VPRED> 1)))
8639    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8640   "TARGET_SVE"
8641   "#"
8642   "&& 1"
8643   [(set (match_dup 5)
8644         (unspec:<VPRED>
8645           [(match_dup 1)
8646            (const_int SVE_KNOWN_PTRUE)
8647            (match_dup 2)
8648            (match_dup 3)]
8649           SVE_COND_FP_CMP_I0))
8650    (set (match_dup 0)
8651         (and:<VPRED>
8652           (and:<VPRED>
8653             (not:<VPRED>
8654               (match_dup 5))
8655             (not:<VPRED>
8656               (match_dup 4)))
8657           (match_dup 1)))]
8658 {
8659   if (can_create_pseudo_p ())
8660     operands[5] = gen_reg_rtx (<VPRED>mode);
8661 }
8662 )
8663
8664 (define_insn_and_split "*fcmuo<mode>_bic_combine"
8665   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8666         (and:<VPRED>
8667           (and:<VPRED>
8668             (not:<VPRED>
8669               (unspec:<VPRED>
8670                 [(match_operand:<VPRED> 1)
8671                  (const_int SVE_KNOWN_PTRUE)
8672                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8673                  (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8674                 UNSPEC_COND_FCMUO))
8675             (match_operand:<VPRED> 4 "register_operand" "Upa"))
8676           (match_dup:<VPRED> 1)))
8677    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8678   "TARGET_SVE"
8679   "#"
8680   "&& 1"
8681   [(set (match_dup 5)
8682         (unspec:<VPRED>
8683           [(match_dup 4)
8684            (const_int SVE_MAYBE_NOT_PTRUE)
8685            (match_dup 2)
8686            (match_dup 3)]
8687           UNSPEC_COND_FCMUO))
8688    (set (match_dup 0)
8689         (and:<VPRED>
8690           (not:<VPRED>
8691             (match_dup 5))
8692           (match_dup 4)))]
8693 {
8694   if (can_create_pseudo_p ())
8695     operands[5] = gen_reg_rtx (<VPRED>mode);
8696 }
8697 )
8698
8699 ;; Same for unordered comparisons.
8700 (define_insn_and_split "*fcmuo<mode>_nor_combine"
8701   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8702         (and:<VPRED>
8703           (and:<VPRED>
8704             (not:<VPRED>
8705               (unspec:<VPRED>
8706                 [(match_operand:<VPRED> 1)
8707                  (const_int SVE_KNOWN_PTRUE)
8708                  (match_operand:SVE_FULL_F 2 "register_operand" "w")
8709                  (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8710                 UNSPEC_COND_FCMUO))
8711             (not:<VPRED>
8712               (match_operand:<VPRED> 4 "register_operand" "Upa")))
8713           (match_dup:<VPRED> 1)))
8714    (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8715   "TARGET_SVE"
8716   "#"
8717   "&& 1"
8718   [(set (match_dup 5)
8719         (unspec:<VPRED>
8720           [(match_dup 1)
8721            (const_int SVE_KNOWN_PTRUE)
8722            (match_dup 2)
8723            (match_dup 3)]
8724           UNSPEC_COND_FCMUO))
8725    (set (match_dup 0)
8726         (and:<VPRED>
8727           (and:<VPRED>
8728             (not:<VPRED>
8729               (match_dup 5))
8730             (not:<VPRED>
8731               (match_dup 4)))
8732           (match_dup 1)))]
8733 {
8734   if (can_create_pseudo_p ())
8735     operands[5] = gen_reg_rtx (<VPRED>mode);
8736 }
8737 )
8738
8739 ;; -------------------------------------------------------------------------
8740 ;; ---- [FP] Absolute comparisons
8741 ;; -------------------------------------------------------------------------
8742 ;; Includes:
8743 ;; - FACGE
8744 ;; - FACGT
8745 ;; - FACLE
8746 ;; - FACLT
8747 ;; -------------------------------------------------------------------------
8748
8749 ;; Predicated floating-point absolute comparisons.
8750 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
8751   [(set (match_operand:<VPRED> 0 "register_operand")
8752         (unspec:<VPRED>
8753           [(match_operand:<VPRED> 1 "register_operand")
8754            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8755            (unspec:SVE_FULL_F
8756              [(match_dup 1)
8757               (match_dup 2)
8758               (match_operand:SVE_FULL_F 3 "register_operand")]
8759              UNSPEC_COND_FABS)
8760            (unspec:SVE_FULL_F
8761              [(match_dup 1)
8762               (match_dup 2)
8763               (match_operand:SVE_FULL_F 4 "register_operand")]
8764              UNSPEC_COND_FABS)]
8765           SVE_COND_FP_ABS_CMP))]
8766   "TARGET_SVE"
8767 )
8768
8769 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
8770   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8771         (unspec:<VPRED>
8772           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8773            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8774            (unspec:SVE_FULL_F
8775              [(match_operand 5)
8776               (const_int SVE_RELAXED_GP)
8777               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8778              UNSPEC_COND_FABS)
8779            (unspec:SVE_FULL_F
8780              [(match_operand 6)
8781               (const_int SVE_RELAXED_GP)
8782               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8783              UNSPEC_COND_FABS)]
8784           SVE_COND_FP_ABS_CMP))]
8785   "TARGET_SVE"
8786   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8787   "&& (!rtx_equal_p (operands[1], operands[5])
8788        || !rtx_equal_p (operands[1], operands[6]))"
8789   {
8790     operands[5] = copy_rtx (operands[1]);
8791     operands[6] = copy_rtx (operands[1]);
8792   }
8793 )
8794
8795 (define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
8796   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8797         (unspec:<VPRED>
8798           [(match_operand:<VPRED> 1 "register_operand" "Upl")
8799            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8800            (unspec:SVE_FULL_F
8801              [(match_dup 1)
8802               (match_operand:SI 5 "aarch64_sve_gp_strictness")
8803               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8804              UNSPEC_COND_FABS)
8805            (unspec:SVE_FULL_F
8806              [(match_dup 1)
8807               (match_operand:SI 6 "aarch64_sve_gp_strictness")
8808               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8809              UNSPEC_COND_FABS)]
8810           SVE_COND_FP_ABS_CMP))]
8811   "TARGET_SVE"
8812   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8813 )
8814
8815 ;; -------------------------------------------------------------------------
8816 ;; ---- [PRED] Select
8817 ;; -------------------------------------------------------------------------
8818 ;; Includes:
8819 ;; - SEL
8820 ;; -------------------------------------------------------------------------
8821
8822 (define_insn "@vcond_mask_<mode><mode>"
8823   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8824         (ior:PRED_ALL
8825           (and:PRED_ALL
8826             (match_operand:PRED_ALL 3 "register_operand" "Upa")
8827             (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8828           (and:PRED_ALL
8829             (not (match_dup 3))
8830             (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8831   "TARGET_SVE"
8832   "sel\t%0.b, %3, %1.b, %2.b"
8833 )
8834
8835 ;; -------------------------------------------------------------------------
8836 ;; ---- [PRED] Test bits
8837 ;; -------------------------------------------------------------------------
8838 ;; Includes:
8839 ;; - PTEST
8840 ;; -------------------------------------------------------------------------
8841
8842 ;; Branch based on predicate equality or inequality.
8843 (define_expand "cbranch<mode>4"
8844   [(set (pc)
8845         (if_then_else
8846           (match_operator 0 "aarch64_equality_operator"
8847             [(match_operand:PRED_ALL 1 "register_operand")
8848              (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8849           (label_ref (match_operand 3 ""))
8850           (pc)))]
8851   ""
8852   {
8853     rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8854     rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8855     rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
8856     rtx pred;
8857     if (operands[2] == CONST0_RTX (<MODE>mode))
8858       pred = operands[1];
8859     else
8860       {
8861         pred = gen_reg_rtx (<MODE>mode);
8862         emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8863                                                  operands[2]));
8864       }
8865     emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
8866     operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8867     operands[2] = const0_rtx;
8868   }
8869 )
8870
8871 ;; See "Description of UNSPEC_PTEST" above for details.
8872 (define_insn "aarch64_ptest<mode>"
8873   [(set (reg:CC_NZC CC_REGNUM)
8874         (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8875                         (match_operand 1)
8876                         (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8877                         (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8878                        UNSPEC_PTEST))]
8879   "TARGET_SVE"
8880   "ptest\t%0, %3.b"
8881 )
8882
8883 ;; =========================================================================
8884 ;; == Reductions
8885 ;; =========================================================================
8886
8887 ;; -------------------------------------------------------------------------
8888 ;; ---- [INT,FP] Conditional reductions
8889 ;; -------------------------------------------------------------------------
8890 ;; Includes:
8891 ;; - CLASTA
8892 ;; - CLASTB
8893 ;; -------------------------------------------------------------------------
8894
8895 ;; Set operand 0 to the last active element in operand 3, or to tied
8896 ;; operand 1 if no elements are active.
8897 (define_insn "@fold_extract_<last_op>_<mode>"
8898   [(set (match_operand:<VEL> 0 "register_operand")
8899         (unspec:<VEL>
8900           [(match_operand:<VEL> 1 "register_operand")
8901            (match_operand:<VPRED> 2 "register_operand")
8902            (match_operand:SVE_FULL 3 "register_operand")]
8903           CLAST))]
8904   "TARGET_SVE"
8905   {@ [ cons: =0 , 1 , 2   , 3  ]
8906      [ ?r       , 0 , Upl , w  ] clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8907      [ w        , 0 , Upl , w  ] clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>
8908   }
8909 )
8910
8911 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
8912   [(set (match_operand:SVE_FULL 0 "register_operand")
8913         (unspec:SVE_FULL
8914           [(match_operand:SVE_FULL 1 "register_operand")
8915            (match_operand:<VPRED> 2 "register_operand")
8916            (match_operand:SVE_FULL 3 "register_operand")]
8917           CLAST))]
8918   "TARGET_SVE"
8919   {@ [ cons: =0 , 1 , 2   , 3  ]
8920      [ w        , 0 , Upl , w  ] clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8921      [ ?&w      , w , Upl , w  ] movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8922   }
8923 )
8924
8925 ;; -------------------------------------------------------------------------
8926 ;; ---- [INT] Tree reductions
8927 ;; -------------------------------------------------------------------------
8928 ;; Includes:
8929 ;; - ANDV
8930 ;; - EORV
8931 ;; - ORV
8932 ;; - SADDV
8933 ;; - SMAXV
8934 ;; - SMINV
8935 ;; - UADDV
8936 ;; - UMAXV
8937 ;; - UMINV
8938 ;; -------------------------------------------------------------------------
8939
8940 ;; Unpredicated integer add reduction.
8941 (define_expand "reduc_plus_scal_<mode>"
8942   [(match_operand:<VEL> 0 "register_operand")
8943    (match_operand:SVE_FULL_I 1 "register_operand")]
8944   "TARGET_SVE"
8945   {
8946     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8947     rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8948     emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8949     if (tmp != operands[0])
8950       emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8951     DONE;
8952   }
8953 )
8954
8955 ;; Predicated integer add reduction.  The result is always 64-bits.
8956 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8957   [(set (match_operand:DI 0 "register_operand" "=w")
8958         (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
8959                     (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8960                    SVE_INT_ADDV))]
8961   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8962   "<su>addv\t%d0, %1, %2.<Vetype>"
8963 )
8964
8965 ;; Unpredicated integer reductions.
8966 (define_expand "reduc_<optab>_scal_<mode>"
8967   [(set (match_operand:<VEL> 0 "register_operand")
8968         (unspec:<VEL> [(match_dup 2)
8969                        (match_operand:SVE_FULL_I 1 "register_operand")]
8970                       SVE_INT_REDUCTION))]
8971   "TARGET_SVE"
8972   {
8973     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8974   }
8975 )
8976
8977 ;; Predicated integer reductions.
8978 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8979   [(set (match_operand:<VEL> 0 "register_operand" "=w")
8980         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8981                        (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8982                       SVE_INT_REDUCTION))]
8983   "TARGET_SVE"
8984   "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
8985 )
8986
8987 ;; -------------------------------------------------------------------------
8988 ;; ---- [FP] Tree reductions
8989 ;; -------------------------------------------------------------------------
8990 ;; Includes:
8991 ;; - FADDV
8992 ;; - FMAXNMV
8993 ;; - FMAXV
8994 ;; - FMINNMV
8995 ;; - FMINV
8996 ;; -------------------------------------------------------------------------
8997
8998 ;; Unpredicated floating-point tree reductions.
8999 (define_expand "reduc_<optab>_scal_<mode>"
9000   [(set (match_operand:<VEL> 0 "register_operand")
9001         (unspec:<VEL> [(match_dup 2)
9002                        (match_operand:SVE_FULL_F 1 "register_operand")]
9003                       SVE_FP_REDUCTION))]
9004   "TARGET_SVE"
9005   {
9006     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9007   }
9008 )
9009
9010 (define_expand "reduc_<fmaxmin>_scal_<mode>"
9011   [(match_operand:<VEL> 0 "register_operand")
9012    (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
9013                  FMAXMINNMV)]
9014   "TARGET_SVE"
9015   {
9016     emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
9017     DONE;
9018   }
9019 )
9020
9021 ;; Predicated floating-point tree reductions.
9022 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
9023   [(set (match_operand:<VEL> 0 "register_operand" "=w")
9024         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
9025                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
9026                       SVE_FP_REDUCTION))]
9027   "TARGET_SVE"
9028   "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
9029 )
9030
9031 ;; -------------------------------------------------------------------------
9032 ;; ---- [FP] Left-to-right reductions
9033 ;; -------------------------------------------------------------------------
9034 ;; Includes:
9035 ;; - FADDA
9036 ;; -------------------------------------------------------------------------
9037
9038 ;; Unpredicated in-order FP reductions.
9039 (define_expand "fold_left_plus_<mode>"
9040   [(set (match_operand:<VEL> 0 "register_operand")
9041         (unspec:<VEL> [(match_dup 3)
9042                        (match_operand:<VEL> 1 "register_operand")
9043                        (match_operand:SVE_FULL_F 2 "register_operand")]
9044                       UNSPEC_FADDA))]
9045   "TARGET_SVE && TARGET_NON_STREAMING"
9046   {
9047     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
9048   }
9049 )
9050
9051 ;; Predicated in-order FP reductions.
9052 (define_insn "mask_fold_left_plus_<mode>"
9053   [(set (match_operand:<VEL> 0 "register_operand" "=w")
9054         (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
9055                        (match_operand:<VEL> 1 "register_operand" "0")
9056                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
9057                       UNSPEC_FADDA))]
9058   "TARGET_SVE && TARGET_NON_STREAMING"
9059   "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
9060 )
9061
9062 ;; =========================================================================
9063 ;; == Permutes
9064 ;; =========================================================================
9065
9066 ;; -------------------------------------------------------------------------
9067 ;; ---- [INT,FP] General permutes
9068 ;; -------------------------------------------------------------------------
9069 ;; Includes:
9070 ;; - TBL
9071 ;; - TBLQ (SVE2p1)
9072 ;; -------------------------------------------------------------------------
9073
9074 (define_expand "vec_perm<mode>"
9075   [(match_operand:SVE_FULL 0 "register_operand")
9076    (match_operand:SVE_FULL 1 "register_operand")
9077    (match_operand:SVE_FULL 2 "register_operand")
9078    (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
9079   "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
9080   {
9081     aarch64_expand_sve_vec_perm (operands[0], operands[1],
9082                                  operands[2], operands[3]);
9083     DONE;
9084   }
9085 )
9086
9087 (define_insn "@aarch64_sve_<perm_insn><mode>"
9088   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9089         (unspec:SVE_FULL
9090           [(match_operand:SVE_FULL 1 "register_operand" "w")
9091            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
9092           SVE_TBL))]
9093   "TARGET_SVE"
9094   "<perm_insn>\t%0.<Vetype>, {%1.<Vetype>}, %2.<Vetype>"
9095 )
9096
9097 ;; -------------------------------------------------------------------------
9098 ;; ---- [INT,FP] Special-purpose unary permutes
9099 ;; -------------------------------------------------------------------------
9100 ;; Includes:
9101 ;; - COMPACT
9102 ;; - DUP
9103 ;; - REV
9104 ;; -------------------------------------------------------------------------
9105
9106 ;; Compact active elements and pad with zeros.
9107 (define_insn "@aarch64_sve_compact<mode>"
9108   [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
9109         (unspec:SVE_FULL_SD
9110           [(match_operand:<VPRED> 1 "register_operand" "Upl")
9111            (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
9112           UNSPEC_SVE_COMPACT))]
9113   "TARGET_SVE && TARGET_NON_STREAMING"
9114   "compact\t%0.<Vetype>, %1, %2.<Vetype>"
9115 )
9116
9117 ;; Duplicate one element of a vector.
9118 (define_insn "@aarch64_sve_dup_lane<mode>"
9119   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9120         (vec_duplicate:SVE_ALL
9121           (vec_select:<VEL>
9122             (match_operand:SVE_ALL 1 "register_operand" "w")
9123             (parallel [(match_operand:SI 2 "const_int_operand")]))))]
9124   "TARGET_SVE
9125    && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
9126   "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
9127 )
9128
9129 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
9130 ;;
9131 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
9132 ;; number op2 + N of op1.  (We don't need to distinguish between memory
9133 ;; and architectural register lane numbering for op1 or op0, since the
9134 ;; two numbering schemes are the same for SVE.)
9135 ;;
9136 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
9137 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
9138 ;; of op0.  We therefore get the correct result for both endiannesses.
9139 ;;
9140 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
9141 ;; is in the opposite order to architectural register lane numbering.
9142 ;; Thus if we were to do this operation via a V128 temporary register,
9143 ;; the vec_select and vec_duplicate would both involve a reverse operation
9144 ;; for big-endian targets.  In this fused pattern the two reverses cancel
9145 ;; each other out.
9146 (define_insn "@aarch64_sve_dupq_lane<mode>"
9147   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9148         (vec_duplicate:SVE_FULL
9149           (vec_select:<V128>
9150             (match_operand:SVE_FULL 1 "register_operand" "w")
9151             (match_operand 2 "ascending_int_parallel"))))]
9152   "TARGET_SVE
9153    && (INTVAL (XVECEXP (operands[2], 0, 0))
9154        * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
9155    && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
9156                 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
9157   {
9158     unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
9159                          * GET_MODE_SIZE (<VEL>mode));
9160     operands[2] = gen_int_mode (byte / 16, DImode);
9161     return "dup\t%0.q, %1.q[%2]";
9162   }
9163 )
9164
9165 ;; Reverse the order of elements within a full vector.
9166 (define_insn "@aarch64_sve_rev<mode>"
9167   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9168         (unspec:SVE_ALL
9169           [(match_operand:SVE_ALL 1 "register_operand" "w")]
9170           UNSPEC_REV))]
9171   "TARGET_SVE"
9172   "rev\t%0.<Vctype>, %1.<Vctype>")
9173
9174 ;; -------------------------------------------------------------------------
9175 ;; ---- [INT,FP] Special-purpose binary permutes
9176 ;; -------------------------------------------------------------------------
9177 ;; Includes:
9178 ;; - EXT
9179 ;; - SPLICE
9180 ;; - TRN1
9181 ;; - TRN2
9182 ;; - UZP1
9183 ;; - UZPQ1 (SVE2p1)
9184 ;; - UZP2
9185 ;; - UZPQ2 (SVE2p1)
9186 ;; - ZIP1
9187 ;; - ZIPQ1 (SVE2p1)
9188 ;; - ZIP2
9189 ;; - ZIPQ2 (SVE2p1)
9190 ;; -------------------------------------------------------------------------
9191
9192 ;; Like EXT, but start at the first active element.
9193 (define_insn "@aarch64_sve_splice<mode>"
9194   [(set (match_operand:SVE_FULL 0 "register_operand")
9195         (unspec:SVE_FULL
9196           [(match_operand:<VPRED> 1 "register_operand")
9197            (match_operand:SVE_FULL 2 "register_operand")
9198            (match_operand:SVE_FULL 3 "register_operand")]
9199           UNSPEC_SVE_SPLICE))]
9200   "TARGET_SVE"
9201   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
9202      [ w        , Upl , 0 , w ; *              ] splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9203      [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9204   }
9205 )
9206
9207 ;; Permutes that take half the elements from one vector and half the
9208 ;; elements from the other.
9209 (define_insn "@aarch64_sve_<perm_insn><mode>"
9210   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9211         (unspec:SVE_ALL
9212           [(match_operand:SVE_ALL 1 "register_operand" "w")
9213            (match_operand:SVE_ALL 2 "register_operand" "w")]
9214           SVE_PERMUTE))]
9215   "TARGET_SVE"
9216   "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
9217 )
9218
9219 ;; Apply PERMUTE to 128-bit sequences.  The behavior of these patterns
9220 ;; doesn't depend on the mode.
9221 (define_insn "@aarch64_sve_<optab><mode>"
9222   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9223         (unspec:SVE_FULL
9224           [(match_operand:SVE_FULL 1 "register_operand" "w")
9225            (match_operand:SVE_FULL 2 "register_operand" "w")]
9226           PERMUTEQ))]
9227   "TARGET_SVE_F64MM"
9228   "<perm_insn>\t%0.q, %1.q, %2.q"
9229 )
9230
9231 ;; Concatenate two vectors and extract a subvector.  Note that the
9232 ;; immediate (third) operand is the lane index not the byte index.
9233 (define_insn "@aarch64_sve_ext<mode>"
9234   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
9235         (unspec:SVE_ALL
9236           [(match_operand:SVE_ALL 1 "register_operand" "0, w")
9237            (match_operand:SVE_ALL 2 "register_operand" "w, w")
9238            (match_operand:SI 3 "const_int_operand")]
9239           UNSPEC_EXT))]
9240   "TARGET_SVE
9241    && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
9242   {
9243     operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
9244     return (which_alternative == 0
9245             ? "ext\\t%0.b, %0.b, %2.b, #%3"
9246             : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
9247   }
9248   [(set_attr "movprfx" "*,yes")]
9249 )
9250
9251 ;; -------------------------------------------------------------------------
9252 ;; ---- [PRED] Special-purpose unary permutes
9253 ;; -------------------------------------------------------------------------
9254 ;; Includes:
9255 ;; - REV
9256 ;; -------------------------------------------------------------------------
9257
9258 (define_insn "@aarch64_sve_rev<mode>"
9259   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9260         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
9261                          UNSPEC_REV))]
9262   "TARGET_SVE"
9263   "rev\t%0.<Vetype>, %1.<Vetype>")
9264
9265 ;; -------------------------------------------------------------------------
9266 ;; ---- [PRED] Special-purpose binary permutes
9267 ;; -------------------------------------------------------------------------
9268 ;; Includes:
9269 ;; - TRN1
9270 ;; - TRN2
9271 ;; - UZP1
9272 ;; - UZP2
9273 ;; - ZIP1
9274 ;; - ZIP2
9275 ;; -------------------------------------------------------------------------
9276
9277 ;; Permutes that take half the elements from one vector and half the
9278 ;; elements from the other.
9279 (define_insn "@aarch64_sve_<perm_insn><mode>"
9280   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9281         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9282                           (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9283                          PERMUTE))]
9284   "TARGET_SVE"
9285   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9286 )
9287
9288 ;; Special purpose permute used by the predicate generation instructions.
9289 ;; Unlike the normal permute patterns, these instructions operate on VNx16BI
9290 ;; regardless of the element size, so that all input and output bits are
9291 ;; well-defined.  Operand 3 then indicates the size of the permute.
9292 (define_insn "@aarch64_sve_trn1_conv<mode>"
9293   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9294         (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
9295                          (match_operand:VNx16BI 2 "register_operand" "Upa")
9296                          (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
9297                         UNSPEC_TRN1_CONV))]
9298   "TARGET_SVE"
9299   "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
9300 )
9301
9302 ;; =========================================================================
9303 ;; == Conversions
9304 ;; =========================================================================
9305
9306 ;; -------------------------------------------------------------------------
9307 ;; ---- [INT<-INT] Packs
9308 ;; -------------------------------------------------------------------------
9309 ;; Includes:
9310 ;; - UZP1
9311 ;; -------------------------------------------------------------------------
9312
9313 ;; Integer pack.  Use UZP1 on the narrower type, which discards
9314 ;; the high part of each wide element.
9315 (define_insn "vec_pack_trunc_<Vwide>"
9316   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
9317         (unspec:SVE_FULL_BHSI
9318           [(match_operand:<VWIDE> 1 "register_operand" "w")
9319            (match_operand:<VWIDE> 2 "register_operand" "w")]
9320           UNSPEC_PACK))]
9321   "TARGET_SVE"
9322   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9323 )
9324
9325 ;; Integer partial pack packing two partial SVE types into a single full SVE
9326 ;; type of the same element type.  Use UZP1 on the wider type, which discards
9327 ;; the high part of each wide element.  This allows to concat SVE partial types
9328 ;; into a wider vector.
9329 (define_insn "@aarch64_pack_partial<mode>"
9330   [(set (match_operand:SVE_NO2E 0 "register_operand" "=w")
9331         (vec_concat:SVE_NO2E
9332           (match_operand:<VHALF> 1 "register_operand" "w")
9333           (match_operand:<VHALF> 2 "register_operand" "w")))]
9334   "TARGET_SVE"
9335   "uzp1\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
9336 )
9337
9338 ;; -------------------------------------------------------------------------
9339 ;; ---- [INT<-INT] Unpacks
9340 ;; -------------------------------------------------------------------------
9341 ;; Includes:
9342 ;; - SUNPKHI
9343 ;; - SUNPKLO
9344 ;; - UUNPKHI
9345 ;; - UUNPKLO
9346 ;; -------------------------------------------------------------------------
9347
9348 ;; Unpack the low or high half of a vector, where "high" refers to
9349 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9350 ;; for little-endian.
9351 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
9352   [(match_operand:<VWIDE> 0 "register_operand")
9353    (unspec:<VWIDE>
9354      [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
9355   "TARGET_SVE"
9356   {
9357     emit_insn ((<hi_lanes_optab>
9358                 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
9359                 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
9360                (operands[0], operands[1]));
9361     DONE;
9362   }
9363 )
9364
9365 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
9366   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
9367         (unspec:<VWIDE>
9368           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
9369           UNPACK))]
9370   "TARGET_SVE"
9371   "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
9372 )
9373
9374 ;; -------------------------------------------------------------------------
9375 ;; ---- [INT<-FP] Conversions
9376 ;; -------------------------------------------------------------------------
9377 ;; Includes:
9378 ;; - FCVTZS
9379 ;; - FCVTZU
9380 ;; -------------------------------------------------------------------------
9381
9382 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
9383 ;; SF to SI or DF to DI).
9384 (define_expand "<optab><mode><v_int_equiv>2"
9385   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
9386         (unspec:<V_INT_EQUIV>
9387           [(match_dup 2)
9388            (const_int SVE_RELAXED_GP)
9389            (match_operand:SVE_FULL_F 1 "register_operand")]
9390           SVE_COND_FCVTI))]
9391   "TARGET_SVE"
9392   {
9393     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9394   }
9395 )
9396
9397 ;; Predicated float-to-integer conversion, either to the same width or wider.
9398 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9399   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9400         (unspec:SVE_FULL_HSDI
9401           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9402            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9403            (match_operand:SVE_FULL_F 2 "register_operand")]
9404           SVE_COND_FCVTI))]
9405   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9406   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9407      [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9408      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9409   }
9410 )
9411
9412 ;; Predicated narrowing float-to-integer conversion.
9413 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9414   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9415         (unspec:VNx4SI_ONLY
9416           [(match_operand:VNx2BI 1 "register_operand")
9417            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9418            (match_operand:VNx2DF_ONLY 2 "register_operand")]
9419           SVE_COND_FCVTI))]
9420   "TARGET_SVE"
9421   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9422      [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9423      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9424   }
9425 )
9426
9427 ;; Predicated float-to-integer conversion with merging, either to the same
9428 ;; width or wider.
9429 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9430   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9431         (unspec:SVE_FULL_HSDI
9432           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9433            (unspec:SVE_FULL_HSDI
9434              [(match_dup 1)
9435               (const_int SVE_STRICT_GP)
9436               (match_operand:SVE_FULL_F 2 "register_operand")]
9437              SVE_COND_FCVTI)
9438            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9439           UNSPEC_SEL))]
9440   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9441 )
9442
9443 ;; The first alternative doesn't need the earlyclobber, but the only case
9444 ;; it would help is the uninteresting one in which operands 2 and 3 are
9445 ;; the same register (despite having different modes).  Making all the
9446 ;; alternatives earlyclobber makes things more consistent for the
9447 ;; register allocator.
9448 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
9449   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9450         (unspec:SVE_FULL_HSDI
9451           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9452            (unspec:SVE_FULL_HSDI
9453              [(match_operand 4)
9454               (const_int SVE_RELAXED_GP)
9455               (match_operand:SVE_FULL_F 2 "register_operand")]
9456              SVE_COND_FCVTI)
9457            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9458           UNSPEC_SEL))]
9459   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9460   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9461      [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9462      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9463      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9464   }
9465   "&& !rtx_equal_p (operands[1], operands[4])"
9466   {
9467     operands[4] = copy_rtx (operands[1]);
9468   }
9469 )
9470
9471 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
9472   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9473         (unspec:SVE_FULL_HSDI
9474           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9475            (unspec:SVE_FULL_HSDI
9476              [(match_dup 1)
9477               (const_int SVE_STRICT_GP)
9478               (match_operand:SVE_FULL_F 2 "register_operand")]
9479              SVE_COND_FCVTI)
9480            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9481           UNSPEC_SEL))]
9482   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9483   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9484      [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9485      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9486      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9487   }
9488 )
9489
9490 ;; Predicated narrowing float-to-integer conversion with merging.
9491 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9492   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9493         (unspec:VNx4SI_ONLY
9494           [(match_operand:VNx2BI 1 "register_operand")
9495            (unspec:VNx4SI_ONLY
9496              [(match_dup 1)
9497               (const_int SVE_STRICT_GP)
9498               (match_operand:VNx2DF_ONLY 2 "register_operand")]
9499              SVE_COND_FCVTI)
9500            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9501           UNSPEC_SEL))]
9502   "TARGET_SVE"
9503 )
9504
9505 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9506   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9507         (unspec:VNx4SI_ONLY
9508           [(match_operand:VNx2BI 1 "register_operand")
9509            (unspec:VNx4SI_ONLY
9510              [(match_dup 1)
9511               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9512               (match_operand:VNx2DF_ONLY 2 "register_operand")]
9513              SVE_COND_FCVTI)
9514            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9515           UNSPEC_SEL))]
9516   "TARGET_SVE"
9517   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9518      [ &w       , Upl , w , 0  ; *              ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9519      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9520      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9521   }
9522 )
9523
9524 ;; -------------------------------------------------------------------------
9525 ;; ---- [INT<-FP] Packs
9526 ;; -------------------------------------------------------------------------
9527 ;; The patterns in this section are synthetic.
9528 ;; -------------------------------------------------------------------------
9529
9530 ;; Convert two vectors of DF to SI and pack the results into a single vector.
9531 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
9532   [(set (match_dup 4)
9533         (unspec:VNx4SI
9534           [(match_dup 3)
9535            (const_int SVE_RELAXED_GP)
9536            (match_operand:VNx2DF 1 "register_operand")]
9537           SVE_COND_FCVTI))
9538    (set (match_dup 5)
9539         (unspec:VNx4SI
9540           [(match_dup 3)
9541            (const_int SVE_RELAXED_GP)
9542            (match_operand:VNx2DF 2 "register_operand")]
9543           SVE_COND_FCVTI))
9544    (set (match_operand:VNx4SI 0 "register_operand")
9545         (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9546   "TARGET_SVE"
9547   {
9548     operands[3] = aarch64_ptrue_reg (VNx2BImode);
9549     operands[4] = gen_reg_rtx (VNx4SImode);
9550     operands[5] = gen_reg_rtx (VNx4SImode);
9551   }
9552 )
9553
9554 ;; -------------------------------------------------------------------------
9555 ;; ---- [INT<-FP] Unpacks
9556 ;; -------------------------------------------------------------------------
9557 ;; No patterns here yet!
9558 ;; -------------------------------------------------------------------------
9559
9560 ;; -------------------------------------------------------------------------
9561 ;; ---- [FP<-INT] Conversions
9562 ;; -------------------------------------------------------------------------
9563 ;; Includes:
9564 ;; - SCVTF
9565 ;; - UCVTF
9566 ;; -------------------------------------------------------------------------
9567
9568 ;; Unpredicated conversion of integers to floats of the same size
9569 ;; (HI to HF, SI to SF or DI to DF).
9570 (define_expand "<optab><v_int_equiv><mode>2"
9571   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9572         (unspec:SVE_FULL_F
9573           [(match_dup 2)
9574            (const_int SVE_RELAXED_GP)
9575            (match_operand:<V_INT_EQUIV> 1 "register_operand")]
9576           SVE_COND_ICVTF))]
9577   "TARGET_SVE"
9578   {
9579     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9580   }
9581 )
9582
9583 ;; Predicated integer-to-float conversion, either to the same width or
9584 ;; narrower.
9585 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9586   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9587         (unspec:SVE_FULL_F
9588           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9589            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9590            (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9591           SVE_COND_ICVTF))]
9592   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9593   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9594      [ w        , Upl , 0 ; *              ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9595      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9596   }
9597 )
9598
9599 ;; Predicated widening integer-to-float conversion.
9600 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9601   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9602         (unspec:VNx2DF_ONLY
9603           [(match_operand:VNx2BI 1 "register_operand")
9604            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9605            (match_operand:VNx4SI_ONLY 2 "register_operand")]
9606           SVE_COND_ICVTF))]
9607   "TARGET_SVE"
9608   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9609      [ w        , Upl , 0 ; *              ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9610      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9611   }
9612 )
9613
9614 ;; Predicated integer-to-float conversion with merging, either to the same
9615 ;; width or narrower.
9616 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9617   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9618         (unspec:SVE_FULL_F
9619           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9620            (unspec:SVE_FULL_F
9621              [(match_dup 1)
9622               (const_int SVE_STRICT_GP)
9623               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9624              SVE_COND_ICVTF)
9625            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9626           UNSPEC_SEL))]
9627   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9628 )
9629
9630 ;; The first alternative doesn't need the earlyclobber, but the only case
9631 ;; it would help is the uninteresting one in which operands 2 and 3 are
9632 ;; the same register (despite having different modes).  Making all the
9633 ;; alternatives earlyclobber makes things more consistent for the
9634 ;; register allocator.
9635 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
9636   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9637         (unspec:SVE_FULL_F
9638           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9639            (unspec:SVE_FULL_F
9640              [(match_operand 4)
9641               (const_int SVE_RELAXED_GP)
9642               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9643              SVE_COND_ICVTF)
9644            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9645           UNSPEC_SEL))]
9646   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9647   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9648      [ &w       , Upl , w , 0  ; *              ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9649      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9650      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9651   }
9652   "&& !rtx_equal_p (operands[1], operands[4])"
9653   {
9654     operands[4] = copy_rtx (operands[1]);
9655   }
9656 )
9657
9658 (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
9659   [(set (match_operand:SVE_FULL_F 0 "register_operand")
9660         (unspec:SVE_FULL_F
9661           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9662            (unspec:SVE_FULL_F
9663              [(match_dup 1)
9664               (const_int SVE_STRICT_GP)
9665               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9666              SVE_COND_ICVTF)
9667            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9668           UNSPEC_SEL))]
9669   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9670   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9671      [ &w       , Upl , w , 0  ; *              ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9672      [ &w       , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9673      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9674   }
9675 )
9676
9677 ;; Predicated widening integer-to-float conversion with merging.
9678 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9679   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9680         (unspec:VNx2DF_ONLY
9681           [(match_operand:VNx2BI 1 "register_operand")
9682            (unspec:VNx2DF_ONLY
9683              [(match_dup 1)
9684               (const_int SVE_STRICT_GP)
9685               (match_operand:VNx4SI_ONLY 2 "register_operand")]
9686              SVE_COND_ICVTF)
9687            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9688           UNSPEC_SEL))]
9689   "TARGET_SVE"
9690 )
9691
9692 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9693   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9694         (unspec:VNx2DF_ONLY
9695           [(match_operand:VNx2BI 1 "register_operand")
9696            (unspec:VNx2DF_ONLY
9697              [(match_dup 1)
9698               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9699               (match_operand:VNx4SI_ONLY 2 "register_operand")]
9700              SVE_COND_ICVTF)
9701            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9702           UNSPEC_SEL))]
9703   "TARGET_SVE"
9704   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9705      [ w        , Upl , w , 0  ; *              ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9706      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9707      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9708   }
9709 )
9710
9711 ;; -------------------------------------------------------------------------
9712 ;; ---- [FP<-INT] Packs
9713 ;; -------------------------------------------------------------------------
9714 ;; No patterns here yet!
9715 ;; -------------------------------------------------------------------------
9716
9717 ;; -------------------------------------------------------------------------
9718 ;; ---- [FP<-INT] Unpacks
9719 ;; -------------------------------------------------------------------------
9720 ;; The patterns in this section are synthetic.
9721 ;; -------------------------------------------------------------------------
9722
9723 ;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
9724 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
9725 ;; unpacked VNx4SI to VNx2DF.
9726 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
9727   [(match_operand:VNx2DF 0 "register_operand")
9728    (FLOATUORS:VNx2DF
9729      (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
9730                     UNPACK_UNSIGNED))]
9731   "TARGET_SVE"
9732   {
9733     /* Use ZIP to do the unpack, since we don't care about the upper halves
9734        and since it has the nice property of not needing any subregs.
9735        If using UUNPK* turns out to be preferable, we could model it as
9736        a ZIP whose first operand is zero.  */
9737     rtx temp = gen_reg_rtx (VNx4SImode);
9738     emit_insn ((<hi_lanes_optab>
9739                 ? gen_aarch64_sve_zip2vnx4si
9740                 : gen_aarch64_sve_zip1vnx4si)
9741                (temp, operands[1], operands[1]));
9742     rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
9743     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9744     emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
9745                (operands[0], ptrue, temp, strictness));
9746     DONE;
9747   }
9748 )
9749
9750 ;; -------------------------------------------------------------------------
9751 ;; ---- [FP<-FP] Packs
9752 ;; -------------------------------------------------------------------------
9753 ;; Includes:
9754 ;; - FCVT
9755 ;; -------------------------------------------------------------------------
9756
9757 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
9758 ;; the results into a single vector.
9759 (define_expand "vec_pack_trunc_<Vwide>"
9760   [(set (match_dup 4)
9761         (unspec:SVE_FULL_HSF
9762           [(match_dup 3)
9763            (const_int SVE_RELAXED_GP)
9764            (match_operand:<VWIDE> 1 "register_operand")]
9765           UNSPEC_COND_FCVT))
9766    (set (match_dup 5)
9767         (unspec:SVE_FULL_HSF
9768           [(match_dup 3)
9769            (const_int SVE_RELAXED_GP)
9770            (match_operand:<VWIDE> 2 "register_operand")]
9771           UNSPEC_COND_FCVT))
9772    (set (match_operand:SVE_FULL_HSF 0 "register_operand")
9773         (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9774   "TARGET_SVE"
9775   {
9776     operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9777     operands[4] = gen_reg_rtx (<MODE>mode);
9778     operands[5] = gen_reg_rtx (<MODE>mode);
9779   }
9780 )
9781
9782 ;; Predicated float-to-float truncation.
9783 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9784   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9785         (unspec:SVE_FULL_HSF
9786           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9787            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9788            (match_operand:SVE_FULL_SDF 2 "register_operand")]
9789           SVE_COND_FCVT))]
9790   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9791   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9792      [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9793      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9794   }
9795 )
9796
9797 ;; Predicated float-to-float truncation with merging.
9798 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9799   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9800         (unspec:SVE_FULL_HSF
9801           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9802            (unspec:SVE_FULL_HSF
9803              [(match_dup 1)
9804               (const_int SVE_STRICT_GP)
9805               (match_operand:SVE_FULL_SDF 2 "register_operand")]
9806              SVE_COND_FCVT)
9807            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9808           UNSPEC_SEL))]
9809   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9810 )
9811
9812 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9813   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9814         (unspec:SVE_FULL_HSF
9815           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9816            (unspec:SVE_FULL_HSF
9817              [(match_dup 1)
9818               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9819               (match_operand:SVE_FULL_SDF 2 "register_operand")]
9820              SVE_COND_FCVT)
9821            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9822           UNSPEC_SEL))]
9823   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9824   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9825      [ w        , Upl , w , 0  ; *              ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9826      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9827      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9828   }
9829 )
9830
9831 ;; -------------------------------------------------------------------------
9832 ;; ---- [FP<-FP] Packs (bfloat16)
9833 ;; -------------------------------------------------------------------------
9834 ;; Includes:
9835 ;; - BFCVT (BF16)
9836 ;; - BFCVTNT (BF16)
9837 ;; -------------------------------------------------------------------------
9838
9839 ;; Predicated BFCVT.
9840 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9841   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9842         (unspec:VNx8BF_ONLY
9843           [(match_operand:VNx4BI 1 "register_operand")
9844            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9845            (match_operand:VNx4SF_ONLY 2 "register_operand")]
9846           SVE_COND_FCVT))]
9847   "TARGET_SVE_BF16"
9848   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9849      [ w        , Upl , 0 ; *              ] bfcvt\t%0.h, %1/m, %2.s
9850      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s
9851   }
9852 )
9853
9854 ;; Predicated BFCVT with merging.
9855 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9856   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9857         (unspec:VNx8BF_ONLY
9858           [(match_operand:VNx4BI 1 "register_operand")
9859            (unspec:VNx8BF_ONLY
9860              [(match_dup 1)
9861               (const_int SVE_STRICT_GP)
9862               (match_operand:VNx4SF_ONLY 2 "register_operand")]
9863              SVE_COND_FCVT)
9864            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9865           UNSPEC_SEL))]
9866   "TARGET_SVE_BF16"
9867 )
9868
9869 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9870   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9871         (unspec:VNx8BF_ONLY
9872           [(match_operand:VNx4BI 1 "register_operand")
9873            (unspec:VNx8BF_ONLY
9874              [(match_dup 1)
9875               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9876               (match_operand:VNx4SF_ONLY 2 "register_operand")]
9877              SVE_COND_FCVT)
9878            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9879           UNSPEC_SEL))]
9880   "TARGET_SVE_BF16"
9881   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9882      [ w        , Upl , w , 0  ; *              ] bfcvt\t%0.h, %1/m, %2.s
9883      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9884      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s
9885   }
9886 )
9887
9888 ;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
9889 ;; pair because the even elements always have to be supplied for active
9890 ;; elements, even if the inactive elements don't matter.
9891 ;;
9892 ;; This instructions does not take MOVPRFX.
9893 (define_insn "@aarch64_sve_cvtnt<mode>"
9894   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9895         (unspec:VNx8BF_ONLY
9896           [(match_operand:VNx4BI 2 "register_operand" "Upl")
9897            (const_int SVE_STRICT_GP)
9898            (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9899            (match_operand:VNx4SF 3 "register_operand" "w")]
9900           UNSPEC_COND_FCVTNT))]
9901   "TARGET_SVE_BF16"
9902   "bfcvtnt\t%0.h, %2/m, %3.s"
9903 )
9904
9905 ;; -------------------------------------------------------------------------
9906 ;; ---- [FP<-FP] Unpacks
9907 ;; -------------------------------------------------------------------------
9908 ;; Includes:
9909 ;; - FCVT
9910 ;; -------------------------------------------------------------------------
9911
9912 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9913 ;; First unpack the source without conversion, then float-convert the
9914 ;; unpacked source.
9915 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
9916   [(match_operand:<VWIDE> 0 "register_operand")
9917    (unspec:SVE_FULL_HSF
9918      [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9919      UNPACK_UNSIGNED)]
9920   "TARGET_SVE"
9921   {
9922     /* Use ZIP to do the unpack, since we don't care about the upper halves
9923        and since it has the nice property of not needing any subregs.
9924        If using UUNPK* turns out to be preferable, we could model it as
9925        a ZIP whose first operand is zero.  */
9926     rtx temp = gen_reg_rtx (<MODE>mode);
9927     emit_insn ((<hi_lanes_optab>
9928                 ? gen_aarch64_sve_zip2<mode>
9929                 : gen_aarch64_sve_zip1<mode>)
9930                 (temp, operands[1], operands[1]));
9931     rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9932     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9933     emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
9934                (operands[0], ptrue, temp, strictness));
9935     DONE;
9936   }
9937 )
9938
9939 ;; Predicated float-to-float extension.
9940 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9941   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9942         (unspec:SVE_FULL_SDF
9943           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9944            (match_operand:SI 3 "aarch64_sve_gp_strictness")
9945            (match_operand:SVE_FULL_HSF 2 "register_operand")]
9946           SVE_COND_FCVT))]
9947   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9948   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
9949      [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9950      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9951   }
9952 )
9953
9954 ;; Predicated float-to-float extension with merging.
9955 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9956   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9957         (unspec:SVE_FULL_SDF
9958           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9959            (unspec:SVE_FULL_SDF
9960              [(match_dup 1)
9961               (const_int SVE_STRICT_GP)
9962               (match_operand:SVE_FULL_HSF 2 "register_operand")]
9963              SVE_COND_FCVT)
9964            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9965           UNSPEC_SEL))]
9966   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9967 )
9968
9969 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9970   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9971         (unspec:SVE_FULL_SDF
9972           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9973            (unspec:SVE_FULL_SDF
9974              [(match_dup 1)
9975               (match_operand:SI 4 "aarch64_sve_gp_strictness")
9976               (match_operand:SVE_FULL_HSF 2 "register_operand")]
9977              SVE_COND_FCVT)
9978            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9979           UNSPEC_SEL))]
9980   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9981   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
9982      [ w        , Upl , w , 0  ; *              ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9983      [ ?&w      , Upl , w , Dz ; yes            ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9984      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9985   }
9986 )
9987
9988 ;; -------------------------------------------------------------------------
9989 ;; ---- [PRED<-PRED] Packs
9990 ;; -------------------------------------------------------------------------
9991 ;; Includes:
9992 ;; - UZP1
9993 ;; -------------------------------------------------------------------------
9994
9995 ;; Predicate pack.  Use UZP1 on the narrower type, which discards
9996 ;; the high part of each wide element.
9997 (define_insn "vec_pack_trunc_<Vwide>"
9998   [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
9999         (unspec:PRED_BHS
10000           [(match_operand:<VWIDE> 1 "register_operand" "Upa")
10001            (match_operand:<VWIDE> 2 "register_operand" "Upa")]
10002           UNSPEC_PACK))]
10003   "TARGET_SVE"
10004   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
10005 )
10006
10007 ;; -------------------------------------------------------------------------
10008 ;; ---- [PRED<-PRED] Unpacks
10009 ;; -------------------------------------------------------------------------
10010 ;; Includes:
10011 ;; - PUNPKHI
10012 ;; - PUNPKLO
10013 ;; -------------------------------------------------------------------------
10014
10015 ;; Unpack the low or high half of a predicate, where "high" refers to
10016 ;; the low-numbered lanes for big-endian and the high-numbered lanes
10017 ;; for little-endian.
10018 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
10019   [(match_operand:<VWIDE> 0 "register_operand")
10020    (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
10021                    UNPACK)]
10022   "TARGET_SVE"
10023   {
10024     emit_insn ((<hi_lanes_optab>
10025                 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
10026                 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
10027                (operands[0], operands[1]));
10028     DONE;
10029   }
10030 )
10031
10032 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
10033   [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
10034         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
10035                         UNPACK_UNSIGNED))]
10036   "TARGET_SVE"
10037   "punpk<perm_hilo>\t%0.h, %1.b"
10038 )
10039
10040 ;; =========================================================================
10041 ;; == Vector partitioning
10042 ;; =========================================================================
10043
10044 ;; -------------------------------------------------------------------------
10045 ;; ---- [PRED] Unary partitioning
10046 ;; -------------------------------------------------------------------------
10047 ;; Includes:
10048 ;; - BRKA
10049 ;; - BRKAS
10050 ;; - BRKB
10051 ;; - BRKBS
10052 ;; -------------------------------------------------------------------------
10053
10054 ;; Note that unlike most other instructions that have both merging and
10055 ;; zeroing forms, these instructions don't operate elementwise and so
10056 ;; don't fit the IFN_COND model.
10057 (define_insn "@aarch64_brk<brk_op>"
10058   [(set (match_operand:VNx16BI 0 "register_operand")
10059         (unspec:VNx16BI
10060           [(match_operand:VNx16BI 1 "register_operand")
10061            (match_operand:VNx16BI 2 "register_operand")
10062            (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero")]
10063           SVE_BRK_UNARY))]
10064   "TARGET_SVE"
10065   {@ [ cons: =0 , 1   , 2   , 3  ; attrs: pred_clobber ]
10066      [ &Upa     ,  Upa , Upa , Dz; yes                 ] brk<brk_op>\t%0.b, %1/z, %2.b
10067      [ ?Upa     ,  0Upa, 0Upa, Dz; yes                 ] ^
10068      [ Upa      ,  Upa , Upa , Dz; no                  ] ^
10069      [ &Upa     ,  Upa , Upa , 0 ; yes                 ] brk<brk_op>\t%0.b, %1/m, %2.b
10070      [ ?Upa     ,  0Upa, 0Upa, 0 ; yes                 ] ^
10071      [ Upa      ,  Upa , Upa , 0 ; no                  ] ^
10072   }
10073 )
10074
10075 ;; Same, but also producing a flags result.
10076 (define_insn "*aarch64_brk<brk_op>_cc"
10077   [(set (reg:CC_NZC CC_REGNUM)
10078         (unspec:CC_NZC
10079           [(match_operand:VNx16BI 1 "register_operand")
10080            (match_dup 1)
10081            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10082            (unspec:VNx16BI
10083              [(match_dup 1)
10084               (match_operand:VNx16BI 2 "register_operand")
10085               (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10086              SVE_BRK_UNARY)]
10087           UNSPEC_PTEST))
10088    (set (match_operand:VNx16BI 0 "register_operand")
10089         (unspec:VNx16BI
10090           [(match_dup 1)
10091            (match_dup 2)
10092            (match_dup 3)]
10093           SVE_BRK_UNARY))]
10094   "TARGET_SVE"
10095   {@ [ cons: =0, 1   , 2   ; attrs: pred_clobber ]
10096      [ &Upa    , Upa , Upa ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b
10097      [ ?Upa    , 0Upa, 0Upa; yes                 ] ^
10098      [ Upa     , Upa , Upa ; no                  ] ^
10099   }
10100 )
10101
10102 ;; Same, but with only the flags result being interesting.
10103 (define_insn "*aarch64_brk<brk_op>_ptest"
10104   [(set (reg:CC_NZC CC_REGNUM)
10105         (unspec:CC_NZC
10106           [(match_operand:VNx16BI 1 "register_operand")
10107            (match_dup 1)
10108            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10109            (unspec:VNx16BI
10110              [(match_dup 1)
10111               (match_operand:VNx16BI 2 "register_operand")
10112               (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10113              SVE_BRK_UNARY)]
10114           UNSPEC_PTEST))
10115    (clobber (match_scratch:VNx16BI 0))]
10116   "TARGET_SVE"
10117   {@ [ cons: =0, 1   , 2   ; attrs: pred_clobber ]
10118      [ &Upa    , Upa , Upa ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b
10119      [ ?Upa    , 0Upa, 0Upa; yes                 ] ^
10120      [ Upa     , Upa , Upa ; no                  ] ^
10121   }
10122 )
10123
10124 ;; -------------------------------------------------------------------------
10125 ;; ---- [PRED] Binary partitioning
10126 ;; -------------------------------------------------------------------------
10127 ;; Includes:
10128 ;; - BRKN
10129 ;; - BRKNS
10130 ;; - BRKPA
10131 ;; - BRKPAS
10132 ;; - BRKPB
10133 ;; - BRKPBS
10134 ;; -------------------------------------------------------------------------
10135
10136 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
10137 (define_insn "@aarch64_brk<brk_op>"
10138   [(set (match_operand:VNx16BI 0 "register_operand")
10139         (unspec:VNx16BI
10140           [(match_operand:VNx16BI 1 "register_operand")
10141            (match_operand:VNx16BI 2 "register_operand")
10142            (match_operand:VNx16BI 3 "register_operand")]
10143           SVE_BRK_BINARY))]
10144   "TARGET_SVE"
10145   {@ [ cons: =0,  1  , 2   , 3             ; attrs: pred_clobber ]
10146      [ &Upa    , Upa , Upa , <brk_reg_con> ; yes                 ] brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b
10147      [ ?Upa    , 0Upa, 0Upa, 0<brk_reg_con>; yes                 ] ^
10148      [ Upa     , Upa , Upa , <brk_reg_con> ; no                  ] ^
10149   }
10150 )
10151
10152 ;; BRKN, producing both a predicate and a flags result.  Unlike other
10153 ;; flag-setting instructions, these flags are always set wrt a ptrue.
10154 (define_insn_and_rewrite "*aarch64_brkn_cc"
10155   [(set (reg:CC_NZC CC_REGNUM)
10156         (unspec:CC_NZC
10157           [(match_operand:VNx16BI 4)
10158            (match_operand:VNx16BI 5)
10159            (const_int SVE_KNOWN_PTRUE)
10160            (unspec:VNx16BI
10161              [(match_operand:VNx16BI 1 "register_operand" "Upa")
10162               (match_operand:VNx16BI 2 "register_operand" "Upa")
10163               (match_operand:VNx16BI 3 "register_operand" "0")]
10164              UNSPEC_BRKN)]
10165           UNSPEC_PTEST))
10166    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
10167         (unspec:VNx16BI
10168           [(match_dup 1)
10169            (match_dup 2)
10170            (match_dup 3)]
10171           UNSPEC_BRKN))]
10172   "TARGET_SVE"
10173   "brkns\t%0.b, %1/z, %2.b, %0.b"
10174   "&& (operands[4] != CONST0_RTX (VNx16BImode)
10175        || operands[5] != CONST0_RTX (VNx16BImode))"
10176   {
10177     operands[4] = CONST0_RTX (VNx16BImode);
10178     operands[5] = CONST0_RTX (VNx16BImode);
10179   }
10180 )
10181
10182 ;; Same, but with only the flags result being interesting.
10183 (define_insn_and_rewrite "*aarch64_brkn_ptest"
10184   [(set (reg:CC_NZC CC_REGNUM)
10185         (unspec:CC_NZC
10186           [(match_operand:VNx16BI 4)
10187            (match_operand:VNx16BI 5)
10188            (const_int SVE_KNOWN_PTRUE)
10189            (unspec:VNx16BI
10190              [(match_operand:VNx16BI 1 "register_operand" "Upa")
10191               (match_operand:VNx16BI 2 "register_operand" "Upa")
10192               (match_operand:VNx16BI 3 "register_operand" "0")]
10193              UNSPEC_BRKN)]
10194           UNSPEC_PTEST))
10195    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
10196   "TARGET_SVE"
10197   "brkns\t%0.b, %1/z, %2.b, %0.b"
10198   "&& (operands[4] != CONST0_RTX (VNx16BImode)
10199        || operands[5] != CONST0_RTX (VNx16BImode))"
10200   {
10201     operands[4] = CONST0_RTX (VNx16BImode);
10202     operands[5] = CONST0_RTX (VNx16BImode);
10203   }
10204 )
10205
10206 ;; BRKPA and BRKPB, producing both a predicate and a flags result.
10207 (define_insn "*aarch64_brk<brk_op>_cc"
10208   [(set (reg:CC_NZC CC_REGNUM)
10209         (unspec:CC_NZC
10210           [(match_operand:VNx16BI 1 "register_operand")
10211            (match_dup 1)
10212            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10213            (unspec:VNx16BI
10214              [(match_dup 1)
10215               (match_operand:VNx16BI 2 "register_operand")
10216               (match_operand:VNx16BI 3 "register_operand")]
10217              SVE_BRKP)]
10218           UNSPEC_PTEST))
10219    (set (match_operand:VNx16BI 0 "register_operand")
10220         (unspec:VNx16BI
10221           [(match_dup 1)
10222            (match_dup 2)
10223            (match_dup 3)]
10224           SVE_BRKP))]
10225   "TARGET_SVE"
10226   {@ [ cons: =0, 1   , 2   , 3   , 4; attrs: pred_clobber ]
10227      [ &Upa    , Upa , Upa , Upa ,  ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10228      [ ?Upa    , 0Upa, 0Upa, 0Upa,  ; yes                 ] ^
10229      [ Upa     , Upa , Upa , Upa ,  ; no                  ] ^
10230   }
10231 )
10232
10233 ;; Same, but with only the flags result being interesting.
10234 (define_insn "*aarch64_brk<brk_op>_ptest"
10235   [(set (reg:CC_NZC CC_REGNUM)
10236         (unspec:CC_NZC
10237           [(match_operand:VNx16BI 1 "register_operand")
10238            (match_dup 1)
10239            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10240            (unspec:VNx16BI
10241              [(match_dup 1)
10242               (match_operand:VNx16BI 2 "register_operand")
10243               (match_operand:VNx16BI 3 "register_operand")]
10244              SVE_BRKP)]
10245           UNSPEC_PTEST))
10246    (clobber (match_scratch:VNx16BI 0))]
10247   "TARGET_SVE"
10248   {@ [ cons: =0, 1   , 2   , 3   ; attrs: pred_clobber ]
10249      [ &Upa    , Upa , Upa , Upa ; yes                 ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10250      [ ?Upa    , 0Upa, 0Upa, 0Upa; yes                 ] ^
10251      [ Upa     , Upa , Upa , Upa ; no                  ] ^
10252   }
10253 )
10254
10255 ;; -------------------------------------------------------------------------
10256 ;; ---- [PRED] Scalarization
10257 ;; -------------------------------------------------------------------------
10258 ;; Includes:
10259 ;; - PFIRST
10260 ;; - PNEXT
10261 ;; -------------------------------------------------------------------------
10262
10263 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
10264   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10265         (unspec:PRED_ALL
10266           [(match_operand:PRED_ALL 1 "register_operand" "Upa")
10267            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10268            (match_operand:PRED_ALL 3 "register_operand" "0")]
10269           SVE_PITER))
10270    (clobber (reg:CC_NZC CC_REGNUM))]
10271   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
10272   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10273 )
10274
10275 ;; Same, but also producing a flags result.
10276 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
10277   [(set (reg:CC_NZC CC_REGNUM)
10278         (unspec:CC_NZC
10279           [(match_operand:VNx16BI 1 "register_operand" "Upa")
10280            (match_operand 2)
10281            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10282            (unspec:PRED_ALL
10283              [(match_operand 4)
10284               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10285               (match_operand:PRED_ALL 6 "register_operand" "0")]
10286              SVE_PITER)]
10287           UNSPEC_PTEST))
10288    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10289         (unspec:PRED_ALL
10290           [(match_dup 4)
10291            (match_dup 5)
10292            (match_dup 6)]
10293           SVE_PITER))]
10294   "TARGET_SVE
10295    && <max_elem_bits> >= <elem_bits>
10296    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10297   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10298   "&& !rtx_equal_p (operands[2], operands[4])"
10299   {
10300     operands[4] = operands[2];
10301     operands[5] = operands[3];
10302   }
10303 )
10304
10305 ;; Same, but with only the flags result being interesting.
10306 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
10307   [(set (reg:CC_NZC CC_REGNUM)
10308         (unspec:CC_NZC
10309           [(match_operand:VNx16BI 1 "register_operand" "Upa")
10310            (match_operand 2)
10311            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10312            (unspec:PRED_ALL
10313              [(match_operand 4)
10314               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10315               (match_operand:PRED_ALL 6 "register_operand" "0")]
10316              SVE_PITER)]
10317           UNSPEC_PTEST))
10318    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
10319   "TARGET_SVE
10320    && <max_elem_bits> >= <elem_bits>
10321    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10322   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10323   "&& !rtx_equal_p (operands[2], operands[4])"
10324   {
10325     operands[4] = operands[2];
10326     operands[5] = operands[3];
10327   }
10328 )
10329
10330 ;; =========================================================================
10331 ;; == Counting elements
10332 ;; =========================================================================
10333
10334 ;; -------------------------------------------------------------------------
10335 ;; ---- [INT] Count elements in a pattern (scalar)
10336 ;; -------------------------------------------------------------------------
10337 ;; Includes:
10338 ;; - CNTB
10339 ;; - CNTD
10340 ;; - CNTH
10341 ;; - CNTW
10342 ;; -------------------------------------------------------------------------
10343
10344 ;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
10345 ;; operand 2 is the number of elements that fit in a 128-bit block, and
10346 ;; operand 3 is a multiplier in the range [1, 16].
10347 ;;
10348 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
10349 (define_insn "aarch64_sve_cnt_pat"
10350   [(set (match_operand:DI 0 "register_operand" "=r")
10351         (zero_extend:DI
10352           (unspec:SI [(match_operand:DI 1 "const_int_operand")
10353                       (match_operand:DI 2 "const_int_operand")
10354                       (match_operand:DI 3 "const_int_operand")]
10355                      UNSPEC_SVE_CNT_PAT)))]
10356   "TARGET_SVE"
10357   {
10358     return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
10359   }
10360 )
10361
10362 ;; -------------------------------------------------------------------------
10363 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
10364 ;; -------------------------------------------------------------------------
10365 ;; Includes:
10366 ;; - INC
10367 ;; - SQINC
10368 ;; - UQINC
10369 ;; -------------------------------------------------------------------------
10370
10371 ;; Increment a DImode register by the number of elements in an svpattern.
10372 ;; See aarch64_sve_cnt_pat for the counting behavior.
10373 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10374   [(set (match_operand:DI 0 "register_operand" "=r")
10375         (ANY_PLUS:DI (zero_extend:DI
10376                        (unspec:SI [(match_operand:DI 2 "const_int_operand")
10377                                    (match_operand:DI 3 "const_int_operand")
10378                                    (match_operand:DI 4 "const_int_operand")]
10379                                   UNSPEC_SVE_CNT_PAT))
10380                      (match_operand:DI_ONLY 1 "register_operand" "0")))]
10381   "TARGET_SVE"
10382   {
10383     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10384                                                  operands + 2);
10385   }
10386 )
10387
10388 ;; Increment an SImode register by the number of elements in an svpattern
10389 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
10390 ;; behavior.
10391 (define_insn "*aarch64_sve_incsi_pat"
10392   [(set (match_operand:SI 0 "register_operand" "=r")
10393         (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
10394                              (match_operand:DI 3 "const_int_operand")
10395                              (match_operand:DI 4 "const_int_operand")]
10396                             UNSPEC_SVE_CNT_PAT)
10397                  (match_operand:SI 1 "register_operand" "0")))]
10398   "TARGET_SVE"
10399   {
10400     return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
10401   }
10402 )
10403
10404 ;; Increment an SImode register by the number of elements in an svpattern
10405 ;; using saturating arithmetic, extending the result to 64 bits.
10406 ;;
10407 ;; See aarch64_sve_cnt_pat for the counting behavior.
10408 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10409   [(set (match_operand:DI 0 "register_operand" "=r")
10410         (<paired_extend>:DI
10411           (SAT_PLUS:SI
10412             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10413                         (match_operand:DI 3 "const_int_operand")
10414                         (match_operand:DI 4 "const_int_operand")]
10415                        UNSPEC_SVE_CNT_PAT)
10416             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10417   "TARGET_SVE"
10418   {
10419     const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
10420     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10421                                                  operands + 2);
10422   }
10423 )
10424
10425 ;; -------------------------------------------------------------------------
10426 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
10427 ;; -------------------------------------------------------------------------
10428 ;; Includes:
10429 ;; - INC
10430 ;; - SQINC
10431 ;; - UQINC
10432 ;; -------------------------------------------------------------------------
10433
10434 ;; Increment a vector of DIs by the number of elements in an svpattern.
10435 ;; See aarch64_sve_cnt_pat for the counting behavior.
10436 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10437   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10438         (ANY_PLUS:VNx2DI
10439           (vec_duplicate:VNx2DI
10440             (zero_extend:DI
10441               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10442                           (match_operand:DI 3 "const_int_operand")
10443                           (match_operand:DI 4 "const_int_operand")]
10444                          UNSPEC_SVE_CNT_PAT)))
10445           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
10446   "TARGET_SVE"
10447   {
10448     if (which_alternative == 1)
10449       output_asm_insn ("movprfx\t%0, %1", operands);
10450     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10451                                                  operands + 2);
10452   }
10453   [(set_attr "movprfx" "*,yes")]
10454 )
10455
10456 ;; Increment a vector of SIs by the number of elements in an svpattern.
10457 ;; See aarch64_sve_cnt_pat for the counting behavior.
10458 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10459   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10460         (ANY_PLUS:VNx4SI
10461           (vec_duplicate:VNx4SI
10462             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10463                         (match_operand:DI 3 "const_int_operand")
10464                         (match_operand:DI 4 "const_int_operand")]
10465                        UNSPEC_SVE_CNT_PAT))
10466           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
10467   "TARGET_SVE"
10468   {
10469     if (which_alternative == 1)
10470       output_asm_insn ("movprfx\t%0, %1", operands);
10471     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10472                                                  operands + 2);
10473   }
10474   [(set_attr "movprfx" "*,yes")]
10475 )
10476
10477 ;; Increment a vector of HIs by the number of elements in an svpattern.
10478 ;; See aarch64_sve_cnt_pat for the counting behavior.
10479 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10480   [(set (match_operand:VNx8HI 0 "register_operand")
10481         (ANY_PLUS:VNx8HI
10482           (vec_duplicate:VNx8HI
10483             (truncate:HI
10484               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10485                           (match_operand:DI 3 "const_int_operand")
10486                           (match_operand:DI 4 "const_int_operand")]
10487                          UNSPEC_SVE_CNT_PAT)))
10488           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10489   "TARGET_SVE"
10490 )
10491
10492 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10493   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10494         (ANY_PLUS:VNx8HI
10495           (vec_duplicate:VNx8HI
10496             (match_operator:HI 5 "subreg_lowpart_operator"
10497               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10498                            (match_operand:DI 3 "const_int_operand")
10499                            (match_operand:DI 4 "const_int_operand")]
10500                           UNSPEC_SVE_CNT_PAT)]))
10501           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10502   "TARGET_SVE"
10503   {
10504     if (which_alternative == 1)
10505       output_asm_insn ("movprfx\t%0, %1", operands);
10506     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10507                                                  operands + 2);
10508   }
10509   [(set_attr "movprfx" "*,yes")]
10510 )
10511
10512 ;; -------------------------------------------------------------------------
10513 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
10514 ;; -------------------------------------------------------------------------
10515 ;; Includes:
10516 ;; - DEC
10517 ;; - SQDEC
10518 ;; - UQDEC
10519 ;; -------------------------------------------------------------------------
10520
10521 ;; Decrement a DImode register by the number of elements in an svpattern.
10522 ;; See aarch64_sve_cnt_pat for the counting behavior.
10523 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10524   [(set (match_operand:DI 0 "register_operand" "=r")
10525         (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
10526                       (zero_extend:DI
10527                         (unspec:SI [(match_operand:DI 2 "const_int_operand")
10528                                     (match_operand:DI 3 "const_int_operand")
10529                                     (match_operand:DI 4 "const_int_operand")]
10530                                    UNSPEC_SVE_CNT_PAT))))]
10531   "TARGET_SVE"
10532   {
10533     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10534                                                  operands + 2);
10535   }
10536 )
10537
10538 ;; Decrement an SImode register by the number of elements in an svpattern
10539 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
10540 ;; behavior.
10541 (define_insn "*aarch64_sve_decsi_pat"
10542   [(set (match_operand:SI 0 "register_operand" "=r")
10543         (minus:SI (match_operand:SI 1 "register_operand" "0")
10544                   (unspec:SI [(match_operand:DI 2 "const_int_operand")
10545                               (match_operand:DI 3 "const_int_operand")
10546                               (match_operand:DI 4 "const_int_operand")]
10547                              UNSPEC_SVE_CNT_PAT)))]
10548   "TARGET_SVE"
10549   {
10550     return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
10551   }
10552 )
10553
10554 ;; Decrement an SImode register by the number of elements in an svpattern
10555 ;; using saturating arithmetic, extending the result to 64 bits.
10556 ;;
10557 ;; See aarch64_sve_cnt_pat for the counting behavior.
10558 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10559   [(set (match_operand:DI 0 "register_operand" "=r")
10560         (<paired_extend>:DI
10561           (SAT_MINUS:SI
10562             (match_operand:SI_ONLY 1 "register_operand" "0")
10563             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10564                         (match_operand:DI 3 "const_int_operand")
10565                         (match_operand:DI 4 "const_int_operand")]
10566                        UNSPEC_SVE_CNT_PAT))))]
10567   "TARGET_SVE"
10568   {
10569     const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
10570     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10571                                                  operands + 2);
10572   }
10573 )
10574
10575 ;; -------------------------------------------------------------------------
10576 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
10577 ;; -------------------------------------------------------------------------
10578 ;; Includes:
10579 ;; - DEC
10580 ;; - SQDEC
10581 ;; - UQDEC
10582 ;; -------------------------------------------------------------------------
10583
10584 ;; Decrement a vector of DIs by the number of elements in an svpattern.
10585 ;; See aarch64_sve_cnt_pat for the counting behavior.
10586 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10587   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10588         (ANY_MINUS:VNx2DI
10589           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10590           (vec_duplicate:VNx2DI
10591             (zero_extend:DI
10592               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10593                           (match_operand:DI 3 "const_int_operand")
10594                           (match_operand:DI 4 "const_int_operand")]
10595                          UNSPEC_SVE_CNT_PAT)))))]
10596   "TARGET_SVE"
10597   {
10598     if (which_alternative == 1)
10599       output_asm_insn ("movprfx\t%0, %1", operands);
10600     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10601                                                  operands + 2);
10602   }
10603   [(set_attr "movprfx" "*,yes")]
10604 )
10605
10606 ;; Decrement a vector of SIs by the number of elements in an svpattern.
10607 ;; See aarch64_sve_cnt_pat for the counting behavior.
10608 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10609   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10610         (ANY_MINUS:VNx4SI
10611           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10612           (vec_duplicate:VNx4SI
10613             (unspec:SI [(match_operand:DI 2 "const_int_operand")
10614                         (match_operand:DI 3 "const_int_operand")
10615                         (match_operand:DI 4 "const_int_operand")]
10616                        UNSPEC_SVE_CNT_PAT))))]
10617   "TARGET_SVE"
10618   {
10619     if (which_alternative == 1)
10620       output_asm_insn ("movprfx\t%0, %1", operands);
10621     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10622                                                  operands + 2);
10623   }
10624   [(set_attr "movprfx" "*,yes")]
10625 )
10626
10627 ;; Decrement a vector of HIs by the number of elements in an svpattern.
10628 ;; See aarch64_sve_cnt_pat for the counting behavior.
10629 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10630   [(set (match_operand:VNx8HI 0 "register_operand")
10631         (ANY_MINUS:VNx8HI
10632           (match_operand:VNx8HI_ONLY 1 "register_operand")
10633           (vec_duplicate:VNx8HI
10634             (truncate:HI
10635               (unspec:SI [(match_operand:DI 2 "const_int_operand")
10636                           (match_operand:DI 3 "const_int_operand")
10637                           (match_operand:DI 4 "const_int_operand")]
10638                          UNSPEC_SVE_CNT_PAT)))))]
10639   "TARGET_SVE"
10640 )
10641
10642 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10643   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10644         (ANY_MINUS:VNx8HI
10645           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10646           (vec_duplicate:VNx8HI
10647             (match_operator:HI 5 "subreg_lowpart_operator"
10648               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10649                            (match_operand:DI 3 "const_int_operand")
10650                            (match_operand:DI 4 "const_int_operand")]
10651                           UNSPEC_SVE_CNT_PAT)]))))]
10652   "TARGET_SVE"
10653   {
10654     if (which_alternative == 1)
10655       output_asm_insn ("movprfx\t%0, %1", operands);
10656     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10657                                                  operands + 2);
10658   }
10659   [(set_attr "movprfx" "*,yes")]
10660 )
10661
10662 ;; -------------------------------------------------------------------------
10663 ;; ---- [INT] Count elements in a predicate (scalar)
10664 ;; -------------------------------------------------------------------------
10665 ;; Includes:
10666 ;; - CNTP
10667 ;; -------------------------------------------------------------------------
10668
10669 ;; Count the number of set bits in a predicate.  Operand 3 is true if
10670 ;; operand 1 is known to be all-true.
10671 (define_insn "@aarch64_pred_cntp<mode>"
10672   [(set (match_operand:DI 0 "register_operand" "=r")
10673         (zero_extend:DI
10674           (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
10675                       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10676                       (match_operand:PRED_ALL 3 "register_operand" "Upa")]
10677                      UNSPEC_CNTP)))]
10678   "TARGET_SVE"
10679   "cntp\t%x0, %1, %3.<Vetype>")
10680
10681 ;; -------------------------------------------------------------------------
10682 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
10683 ;; -------------------------------------------------------------------------
10684 ;; Includes:
10685 ;; - INCP
10686 ;; - SQINCP
10687 ;; - UQINCP
10688 ;; -------------------------------------------------------------------------
10689
10690 ;; Increment a DImode register by the number of set bits in a predicate.
10691 ;; See aarch64_sve_cntp for a description of the operands.
10692 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10693   [(set (match_operand:DI 0 "register_operand")
10694         (ANY_PLUS:DI
10695           (zero_extend:DI
10696             (unspec:SI [(match_dup 3)
10697                         (const_int SVE_KNOWN_PTRUE)
10698                         (match_operand:PRED_ALL 2 "register_operand")]
10699                        UNSPEC_CNTP))
10700           (match_operand:DI_ONLY 1 "register_operand")))]
10701   "TARGET_SVE"
10702   {
10703     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10704   }
10705 )
10706
10707 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10708   [(set (match_operand:DI 0 "register_operand" "=r")
10709         (ANY_PLUS:DI
10710           (zero_extend:DI
10711             (unspec:SI [(match_operand 3)
10712                         (const_int SVE_KNOWN_PTRUE)
10713                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10714                        UNSPEC_CNTP))
10715           (match_operand:DI_ONLY 1 "register_operand" "0")))]
10716   "TARGET_SVE"
10717   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10718   "&& !CONSTANT_P (operands[3])"
10719   {
10720     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10721   }
10722 )
10723
10724 ;; Increment an SImode register by the number of set bits in a predicate
10725 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of
10726 ;; the operands.
10727 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
10728   [(set (match_operand:SI 0 "register_operand" "=r")
10729         (plus:SI
10730           (unspec:SI [(match_operand 3)
10731                       (const_int SVE_KNOWN_PTRUE)
10732                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10733                      UNSPEC_CNTP)
10734           (match_operand:SI 1 "register_operand" "0")))]
10735   "TARGET_SVE"
10736   "incp\t%x0, %2.<Vetype>"
10737   "&& !CONSTANT_P (operands[3])"
10738   {
10739     operands[3] = CONSTM1_RTX (<MODE>mode);
10740   }
10741 )
10742
10743 ;; Increment an SImode register by the number of set bits in a predicate
10744 ;; using saturating arithmetic, extending the result to 64 bits.
10745 ;;
10746 ;; See aarch64_sve_cntp for a description of the operands.
10747 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10748   [(set (match_operand:DI 0 "register_operand")
10749         (<paired_extend>:DI
10750           (SAT_PLUS:SI
10751             (unspec:SI [(match_dup 3)
10752                         (const_int SVE_KNOWN_PTRUE)
10753                         (match_operand:PRED_ALL 2 "register_operand")]
10754                        UNSPEC_CNTP)
10755             (match_operand:SI_ONLY 1 "register_operand"))))]
10756   "TARGET_SVE"
10757   {
10758     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10759   }
10760 )
10761
10762 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10763   [(set (match_operand:DI 0 "register_operand" "=r")
10764         (<paired_extend>:DI
10765           (SAT_PLUS:SI
10766             (unspec:SI [(match_operand 3)
10767                         (const_int SVE_KNOWN_PTRUE)
10768                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10769                        UNSPEC_CNTP)
10770             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10771   "TARGET_SVE"
10772   {
10773     if (<CODE> == SS_PLUS)
10774       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10775     else
10776       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10777   }
10778   "&& !CONSTANT_P (operands[3])"
10779   {
10780     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10781   }
10782 )
10783
10784 ;; -------------------------------------------------------------------------
10785 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
10786 ;; -------------------------------------------------------------------------
10787 ;; Includes:
10788 ;; - INCP
10789 ;; - SQINCP
10790 ;; - UQINCP
10791 ;; -------------------------------------------------------------------------
10792
10793 ;; Increment a vector of DIs by the number of set bits in a predicate.
10794 ;; See aarch64_sve_cntp for a description of the operands.
10795 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10796   [(set (match_operand:VNx2DI 0 "register_operand")
10797         (ANY_PLUS:VNx2DI
10798           (vec_duplicate:VNx2DI
10799             (zero_extend:DI
10800               (unspec:SI
10801                 [(match_dup 3)
10802                  (const_int SVE_KNOWN_PTRUE)
10803                  (match_operand:<VPRED> 2 "register_operand")]
10804                 UNSPEC_CNTP)))
10805           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10806   "TARGET_SVE"
10807   {
10808     operands[3] = CONSTM1_RTX (<VPRED>mode);
10809   }
10810 )
10811
10812 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10813   [(set (match_operand:VNx2DI 0 "register_operand")
10814         (ANY_PLUS:VNx2DI
10815           (vec_duplicate:VNx2DI
10816             (zero_extend:DI
10817               (unspec:SI
10818                 [(match_operand 3)
10819                  (const_int SVE_KNOWN_PTRUE)
10820                  (match_operand:<VPRED> 2 "register_operand")]
10821                 UNSPEC_CNTP)))
10822           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10823   "TARGET_SVE"
10824   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
10825      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.d, %2
10826      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
10827   }
10828   "&& !CONSTANT_P (operands[3])"
10829   {
10830     operands[3] = CONSTM1_RTX (<VPRED>mode);
10831   }
10832 )
10833
10834 ;; Increment a vector of SIs by the number of set bits in a predicate.
10835 ;; See aarch64_sve_cntp for a description of the operands.
10836 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10837   [(set (match_operand:VNx4SI 0 "register_operand")
10838         (ANY_PLUS:VNx4SI
10839           (vec_duplicate:VNx4SI
10840             (unspec:SI
10841               [(match_dup 3)
10842                (const_int SVE_KNOWN_PTRUE)
10843                (match_operand:<VPRED> 2 "register_operand")]
10844               UNSPEC_CNTP))
10845           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10846   "TARGET_SVE"
10847   {
10848     operands[3] = CONSTM1_RTX (<VPRED>mode);
10849   }
10850 )
10851
10852 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10853   [(set (match_operand:VNx4SI 0 "register_operand")
10854         (ANY_PLUS:VNx4SI
10855           (vec_duplicate:VNx4SI
10856             (unspec:SI
10857               [(match_operand 3)
10858                (const_int SVE_KNOWN_PTRUE)
10859                (match_operand:<VPRED> 2 "register_operand")]
10860               UNSPEC_CNTP))
10861           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10862   "TARGET_SVE"
10863   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
10864      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.s, %2
10865      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
10866   }
10867   "&& !CONSTANT_P (operands[3])"
10868   {
10869     operands[3] = CONSTM1_RTX (<VPRED>mode);
10870   }
10871 )
10872
10873 ;; Increment a vector of HIs by the number of set bits in a predicate.
10874 ;; See aarch64_sve_cntp for a description of the operands.
10875 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10876   [(set (match_operand:VNx8HI 0 "register_operand")
10877         (ANY_PLUS:VNx8HI
10878           (vec_duplicate:VNx8HI
10879             (truncate:HI
10880               (unspec:SI
10881                 [(match_dup 3)
10882                  (const_int SVE_KNOWN_PTRUE)
10883                  (match_operand:<VPRED> 2 "register_operand")]
10884                 UNSPEC_CNTP)))
10885           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10886   "TARGET_SVE"
10887   {
10888     operands[3] = CONSTM1_RTX (<VPRED>mode);
10889   }
10890 )
10891
10892 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10893   [(set (match_operand:VNx8HI 0 "register_operand")
10894         (ANY_PLUS:VNx8HI
10895           (vec_duplicate:VNx8HI
10896             (match_operator:HI 3 "subreg_lowpart_operator"
10897               [(unspec:SI
10898                  [(match_operand 4)
10899                   (const_int SVE_KNOWN_PTRUE)
10900                   (match_operand:<VPRED> 2 "register_operand")]
10901                  UNSPEC_CNTP)]))
10902           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10903   "TARGET_SVE"
10904   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
10905      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.h, %2
10906      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
10907   }
10908   "&& !CONSTANT_P (operands[4])"
10909   {
10910     operands[4] = CONSTM1_RTX (<VPRED>mode);
10911   }
10912 )
10913
10914 ;; -------------------------------------------------------------------------
10915 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10916 ;; -------------------------------------------------------------------------
10917 ;; Includes:
10918 ;; - DECP
10919 ;; - SQDECP
10920 ;; - UQDECP
10921 ;; -------------------------------------------------------------------------
10922
10923 ;; Decrement a DImode register by the number of set bits in a predicate.
10924 ;; See aarch64_sve_cntp for a description of the operands.
10925 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10926   [(set (match_operand:DI 0 "register_operand")
10927         (ANY_MINUS:DI
10928           (match_operand:DI_ONLY 1 "register_operand")
10929           (zero_extend:DI
10930             (unspec:SI [(match_dup 3)
10931                         (const_int SVE_KNOWN_PTRUE)
10932                         (match_operand:PRED_ALL 2 "register_operand")]
10933                        UNSPEC_CNTP))))]
10934   "TARGET_SVE"
10935   {
10936     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10937   }
10938 )
10939
10940 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10941   [(set (match_operand:DI 0 "register_operand" "=r")
10942         (ANY_MINUS:DI
10943           (match_operand:DI_ONLY 1 "register_operand" "0")
10944           (zero_extend:DI
10945             (unspec:SI [(match_operand 3)
10946                         (const_int SVE_KNOWN_PTRUE)
10947                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10948                        UNSPEC_CNTP))))]
10949   "TARGET_SVE"
10950   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10951   "&& !CONSTANT_P (operands[3])"
10952   {
10953     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10954   }
10955 )
10956
10957 ;; Decrement an SImode register by the number of set bits in a predicate
10958 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
10959 ;; operands.
10960 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10961   [(set (match_operand:SI 0 "register_operand" "=r")
10962         (minus:SI
10963           (match_operand:SI 1 "register_operand" "0")
10964           (unspec:SI [(match_operand 3)
10965                       (const_int SVE_KNOWN_PTRUE)
10966                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10967                      UNSPEC_CNTP)))]
10968   "TARGET_SVE"
10969   "decp\t%x0, %2.<Vetype>"
10970   "&& !CONSTANT_P (operands[3])"
10971   {
10972     operands[3] = CONSTM1_RTX (<MODE>mode);
10973   }
10974 )
10975
10976 ;; Decrement an SImode register by the number of set bits in a predicate
10977 ;; using saturating arithmetic, extending the result to 64 bits.
10978 ;;
10979 ;; See aarch64_sve_cntp for a description of the operands.
10980 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10981   [(set (match_operand:DI 0 "register_operand")
10982         (<paired_extend>:DI
10983           (SAT_MINUS:SI
10984             (match_operand:SI_ONLY 1 "register_operand")
10985             (unspec:SI [(match_dup 3)
10986                         (const_int SVE_KNOWN_PTRUE)
10987                         (match_operand:PRED_ALL 2 "register_operand")]
10988                        UNSPEC_CNTP))))]
10989   "TARGET_SVE"
10990   {
10991     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10992   }
10993 )
10994
10995 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10996   [(set (match_operand:DI 0 "register_operand" "=r")
10997         (<paired_extend>:DI
10998           (SAT_MINUS:SI
10999             (match_operand:SI_ONLY 1 "register_operand" "0")
11000             (unspec:SI [(match_operand 3)
11001                         (const_int SVE_KNOWN_PTRUE)
11002                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
11003                        UNSPEC_CNTP))))]
11004   "TARGET_SVE"
11005   {
11006     if (<CODE> == SS_MINUS)
11007       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
11008     else
11009       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
11010   }
11011   "&& !CONSTANT_P (operands[3])"
11012   {
11013     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
11014   }
11015 )
11016
11017 ;; -------------------------------------------------------------------------
11018 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
11019 ;; -------------------------------------------------------------------------
11020 ;; Includes:
11021 ;; - DECP
11022 ;; - SQDECP
11023 ;; - UQDECP
11024 ;; -------------------------------------------------------------------------
11025
11026 ;; Decrement a vector of DIs by the number of set bits in a predicate.
11027 ;; See aarch64_sve_cntp for a description of the operands.
11028 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11029   [(set (match_operand:VNx2DI 0 "register_operand")
11030         (ANY_MINUS:VNx2DI
11031           (match_operand:VNx2DI_ONLY 1 "register_operand")
11032           (vec_duplicate:VNx2DI
11033             (zero_extend:DI
11034               (unspec:SI
11035                 [(match_dup 3)
11036                  (const_int SVE_KNOWN_PTRUE)
11037                  (match_operand:<VPRED> 2 "register_operand")]
11038                 UNSPEC_CNTP)))))]
11039   "TARGET_SVE"
11040   {
11041     operands[3] = CONSTM1_RTX (<VPRED>mode);
11042   }
11043 )
11044
11045 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11046   [(set (match_operand:VNx2DI 0 "register_operand")
11047         (ANY_MINUS:VNx2DI
11048           (match_operand:VNx2DI_ONLY 1 "register_operand")
11049           (vec_duplicate:VNx2DI
11050             (zero_extend:DI
11051               (unspec:SI
11052                 [(match_operand 3)
11053                  (const_int SVE_KNOWN_PTRUE)
11054                  (match_operand:<VPRED> 2 "register_operand")]
11055                 UNSPEC_CNTP)))))]
11056   "TARGET_SVE"
11057   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
11058      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.d, %2
11059      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
11060   }
11061   "&& !CONSTANT_P (operands[3])"
11062   {
11063     operands[3] = CONSTM1_RTX (<VPRED>mode);
11064   }
11065 )
11066
11067 ;; Decrement a vector of SIs by the number of set bits in a predicate.
11068 ;; See aarch64_sve_cntp for a description of the operands.
11069 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11070   [(set (match_operand:VNx4SI 0 "register_operand")
11071         (ANY_MINUS:VNx4SI
11072           (match_operand:VNx4SI_ONLY 1 "register_operand")
11073           (vec_duplicate:VNx4SI
11074             (unspec:SI
11075               [(match_dup 3)
11076                (const_int SVE_KNOWN_PTRUE)
11077                (match_operand:<VPRED> 2 "register_operand")]
11078               UNSPEC_CNTP))))]
11079   "TARGET_SVE"
11080   {
11081     operands[3] = CONSTM1_RTX (<VPRED>mode);
11082   }
11083 )
11084
11085 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11086   [(set (match_operand:VNx4SI 0 "register_operand")
11087         (ANY_MINUS:VNx4SI
11088           (match_operand:VNx4SI_ONLY 1 "register_operand")
11089           (vec_duplicate:VNx4SI
11090             (unspec:SI
11091               [(match_operand 3)
11092                (const_int SVE_KNOWN_PTRUE)
11093                (match_operand:<VPRED> 2 "register_operand")]
11094               UNSPEC_CNTP))))]
11095   "TARGET_SVE"
11096   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
11097      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.s, %2
11098      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
11099   }
11100   "&& !CONSTANT_P (operands[3])"
11101   {
11102     operands[3] = CONSTM1_RTX (<VPRED>mode);
11103   }
11104 )
11105
11106 ;; Decrement a vector of HIs by the number of set bits in a predicate.
11107 ;; See aarch64_sve_cntp for a description of the operands.
11108 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11109   [(set (match_operand:VNx8HI 0 "register_operand")
11110         (ANY_MINUS:VNx8HI
11111           (match_operand:VNx8HI_ONLY 1 "register_operand")
11112           (vec_duplicate:VNx8HI
11113             (truncate:HI
11114               (unspec:SI
11115                 [(match_dup 3)
11116                  (const_int SVE_KNOWN_PTRUE)
11117                  (match_operand:<VPRED> 2 "register_operand")]
11118                 UNSPEC_CNTP)))))]
11119   "TARGET_SVE"
11120   {
11121     operands[3] = CONSTM1_RTX (<VPRED>mode);
11122   }
11123 )
11124
11125 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11126   [(set (match_operand:VNx8HI 0 "register_operand")
11127         (ANY_MINUS:VNx8HI
11128           (match_operand:VNx8HI_ONLY 1 "register_operand")
11129           (vec_duplicate:VNx8HI
11130             (match_operator:HI 3 "subreg_lowpart_operator"
11131               [(unspec:SI
11132                  [(match_operand 4)
11133                   (const_int SVE_KNOWN_PTRUE)
11134                   (match_operand:<VPRED> 2 "register_operand")]
11135                  UNSPEC_CNTP)]))))]
11136   "TARGET_SVE"
11137   {@ [ cons: =0 , 1 , 2   ; attrs: movprfx ]
11138      [ w        , 0 , Upa ; *              ] <inc_dec>p\t%0.h, %2
11139      [ ?&w      , w , Upa ; yes            ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
11140   }
11141   "&& !CONSTANT_P (operands[4])"
11142   {
11143     operands[4] = CONSTM1_RTX (<VPRED>mode);
11144   }
11145 )
11146
11147 (define_insn_and_split "@aarch64_sve_get_neonq_<mode>"
11148   [(set (match_operand:<V128> 0 "register_operand" "=w")
11149           (vec_select:<V128>
11150             (match_operand:SVE_FULL 1 "register_operand" "w")
11151             (match_operand 2 "descending_int_parallel")))]
11152   "TARGET_SVE
11153    && BYTES_BIG_ENDIAN
11154    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
11155                 GET_MODE_NUNITS (<V128>mode) - 1)"
11156   "#"
11157   "&& reload_completed"
11158   [(set (match_dup 0) (match_dup 1))]
11159   {
11160     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
11161   }
11162 )
11163
11164 (define_insn "@aarch64_sve_set_neonq_<mode>"
11165   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
11166       (unspec:SVE_FULL
11167         [(match_operand:SVE_FULL 1 "register_operand" "w")
11168         (match_operand:<V128> 2 "register_operand" "w")
11169         (match_operand:<VPRED> 3 "register_operand" "Upl")]
11170         UNSPEC_SET_NEONQ))]
11171   "TARGET_SVE
11172    && BYTES_BIG_ENDIAN"
11173   {
11174     operands[2] = lowpart_subreg (<MODE>mode, operands[2],
11175                                   GET_MODE (operands[2]));
11176     return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
11177   }
11178 )